1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=SKX
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ --check-prefix=AVX512DQNOBW
6 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
7 ; KNL-LABEL: zext_8x8mem_to_8x16:
9 ; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
10 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
11 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
12 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
15 ; SKX-LABEL: zext_8x8mem_to_8x16:
17 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
18 ; SKX-NEXT: vpmovw2m %xmm0, %k1
19 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
22 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
23 ; AVX512DQNOBW: # %bb.0:
24 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
25 ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
26 ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
27 ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
28 ; AVX512DQNOBW-NEXT: retq
29 %a = load <8 x i8>,<8 x i8> *%i,align 1
30 %x = zext <8 x i8> %a to <8 x i16>
31 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
35 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
36 ; KNL-LABEL: sext_8x8mem_to_8x16:
38 ; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
39 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
40 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
41 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
44 ; SKX-LABEL: sext_8x8mem_to_8x16:
46 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
47 ; SKX-NEXT: vpmovw2m %xmm0, %k1
48 ; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
51 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
52 ; AVX512DQNOBW: # %bb.0:
53 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1
54 ; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
55 ; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
56 ; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
57 ; AVX512DQNOBW-NEXT: retq
58 %a = load <8 x i8>,<8 x i8> *%i,align 1
59 %x = sext <8 x i8> %a to <8 x i16>
60 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
65 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
66 ; KNL-LABEL: zext_16x8mem_to_16x16:
68 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
69 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
70 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
71 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
72 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
75 ; SKX-LABEL: zext_16x8mem_to_16x16:
77 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
78 ; SKX-NEXT: vpmovb2m %xmm0, %k1
79 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
82 ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x16:
83 ; AVX512DQNOBW: # %bb.0:
84 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
85 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
86 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
87 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
88 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0
89 ; AVX512DQNOBW-NEXT: retq
90 %a = load <16 x i8>,<16 x i8> *%i,align 1
91 %x = zext <16 x i8> %a to <16 x i16>
92 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
96 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
97 ; KNL-LABEL: sext_16x8mem_to_16x16:
99 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
100 ; KNL-NEXT: vpmovsxbw (%rdi), %ymm1
101 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
102 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
103 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
106 ; SKX-LABEL: sext_16x8mem_to_16x16:
108 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
109 ; SKX-NEXT: vpmovb2m %xmm0, %k1
110 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
113 ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x16:
114 ; AVX512DQNOBW: # %bb.0:
115 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
116 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm1
117 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
118 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
119 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm0, %ymm0
120 ; AVX512DQNOBW-NEXT: retq
121 %a = load <16 x i8>,<16 x i8> *%i,align 1
122 %x = sext <16 x i8> %a to <16 x i16>
123 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
127 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
128 ; ALL-LABEL: zext_16x8_to_16x16:
130 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
132 %x = zext <16 x i8> %a to <16 x i16>
136 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
137 ; KNL-LABEL: zext_16x8_to_16x16_mask:
139 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
140 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
141 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
142 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
143 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
146 ; SKX-LABEL: zext_16x8_to_16x16_mask:
148 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
149 ; SKX-NEXT: vpmovb2m %xmm1, %k1
150 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
153 ; AVX512DQNOBW-LABEL: zext_16x8_to_16x16_mask:
154 ; AVX512DQNOBW: # %bb.0:
155 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
156 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
157 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
158 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
159 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
160 ; AVX512DQNOBW-NEXT: retq
161 %x = zext <16 x i8> %a to <16 x i16>
162 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
166 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
167 ; ALL-LABEL: sext_16x8_to_16x16:
169 ; ALL-NEXT: vpmovsxbw %xmm0, %ymm0
171 %x = sext <16 x i8> %a to <16 x i16>
175 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
176 ; KNL-LABEL: sext_16x8_to_16x16_mask:
178 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
179 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
180 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
181 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
182 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
185 ; SKX-LABEL: sext_16x8_to_16x16_mask:
187 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
188 ; SKX-NEXT: vpmovb2m %xmm1, %k1
189 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
192 ; AVX512DQNOBW-LABEL: sext_16x8_to_16x16_mask:
193 ; AVX512DQNOBW: # %bb.0:
194 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
195 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
196 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
197 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
198 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
199 ; AVX512DQNOBW-NEXT: retq
200 %x = sext <16 x i8> %a to <16 x i16>
201 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
205 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
206 ; KNL-LABEL: zext_32x8mem_to_32x16:
208 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
209 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
210 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
211 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
212 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
213 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
214 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
215 ; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0
216 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
217 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
218 ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
221 ; SKX-LABEL: zext_32x8mem_to_32x16:
223 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
224 ; SKX-NEXT: vpmovb2m %ymm0, %k1
225 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
228 ; AVX512DQNOBW-LABEL: zext_32x8mem_to_32x16:
229 ; AVX512DQNOBW: # %bb.0:
230 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
231 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
232 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
233 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
234 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
235 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
236 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
237 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm0, %ymm0
238 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
239 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
240 ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm1, %ymm1
241 ; AVX512DQNOBW-NEXT: retq
242 %a = load <32 x i8>,<32 x i8> *%i,align 1
243 %x = zext <32 x i8> %a to <32 x i16>
244 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
248 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
249 ; KNL-LABEL: sext_32x8mem_to_32x16:
251 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
252 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
253 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
254 ; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2
255 ; KNL-NEXT: vpmovsxbw (%rdi), %ymm3
256 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
257 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
258 ; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0
259 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
260 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
261 ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
264 ; SKX-LABEL: sext_32x8mem_to_32x16:
266 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
267 ; SKX-NEXT: vpmovb2m %ymm0, %k1
268 ; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
271 ; AVX512DQNOBW-LABEL: sext_32x8mem_to_32x16:
272 ; AVX512DQNOBW: # %bb.0:
273 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
274 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
275 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
276 ; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2
277 ; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3
278 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
279 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
280 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm0, %ymm0
281 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
282 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
283 ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm1, %ymm1
284 ; AVX512DQNOBW-NEXT: retq
285 %a = load <32 x i8>,<32 x i8> *%i,align 1
286 %x = sext <32 x i8> %a to <32 x i16>
287 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
291 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
292 ; KNL-LABEL: zext_32x8_to_32x16:
294 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
295 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
296 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
297 ; KNL-NEXT: vmovdqa %ymm2, %ymm0
300 ; SKX-LABEL: zext_32x8_to_32x16:
302 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
305 ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16:
306 ; AVX512DQNOBW: # %bb.0:
307 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
308 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
309 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
310 ; AVX512DQNOBW-NEXT: vmovdqa %ymm2, %ymm0
311 ; AVX512DQNOBW-NEXT: retq
312 %x = zext <32 x i8> %a to <32 x i16>
316 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
317 ; KNL-LABEL: zext_32x8_to_32x16_mask:
319 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
320 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
321 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
322 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3
323 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
324 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
325 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
326 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
327 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
328 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm1
329 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
330 ; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1
333 ; SKX-LABEL: zext_32x8_to_32x16_mask:
335 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
336 ; SKX-NEXT: vpmovb2m %ymm1, %k1
337 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
340 ; AVX512DQNOBW-LABEL: zext_32x8_to_32x16_mask:
341 ; AVX512DQNOBW: # %bb.0:
342 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
343 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
344 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
345 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm3
346 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
347 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
348 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
349 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
350 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
351 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm1
352 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
353 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm1, %ymm1
354 ; AVX512DQNOBW-NEXT: retq
355 %x = zext <32 x i8> %a to <32 x i16>
356 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
360 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
361 ; KNL-LABEL: sext_32x8_to_32x16:
363 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm2
364 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
365 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm1
366 ; KNL-NEXT: vmovdqa %ymm2, %ymm0
369 ; SKX-LABEL: sext_32x8_to_32x16:
371 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0
374 ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16:
375 ; AVX512DQNOBW: # %bb.0:
376 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm2
377 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
378 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm1
379 ; AVX512DQNOBW-NEXT: vmovdqa %ymm2, %ymm0
380 ; AVX512DQNOBW-NEXT: retq
381 %x = sext <32 x i8> %a to <32 x i16>
385 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
386 ; KNL-LABEL: sext_32x8_to_32x16_mask:
388 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
389 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
390 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
391 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3
392 ; KNL-NEXT: vpmovsxbw %xmm3, %ymm3
393 ; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
394 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
395 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
396 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
397 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm1
398 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
399 ; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1
402 ; SKX-LABEL: sext_32x8_to_32x16_mask:
404 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
405 ; SKX-NEXT: vpmovb2m %ymm1, %k1
406 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
409 ; AVX512DQNOBW-LABEL: sext_32x8_to_32x16_mask:
410 ; AVX512DQNOBW: # %bb.0:
411 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
412 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
413 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
414 ; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm3
415 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm3, %ymm3
416 ; AVX512DQNOBW-NEXT: vpmovsxbw %xmm0, %ymm0
417 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
418 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
419 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
420 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm2, %ymm1
421 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
422 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm1, %ymm1
423 ; AVX512DQNOBW-NEXT: retq
424 %x = sext <32 x i8> %a to <32 x i16>
425 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
429 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
430 ; KNL-LABEL: zext_4x8mem_to_4x32:
432 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
433 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
434 ; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
435 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
436 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
437 ; KNL-NEXT: vzeroupper
440 ; AVX512DQ-LABEL: zext_4x8mem_to_4x32:
442 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
443 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
444 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
445 ; AVX512DQ-NEXT: retq
446 %a = load <4 x i8>,<4 x i8> *%i,align 1
447 %x = zext <4 x i8> %a to <4 x i32>
448 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
452 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
453 ; KNL-LABEL: sext_4x8mem_to_4x32:
455 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
456 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
457 ; KNL-NEXT: vpmovsxbd (%rdi), %xmm0
458 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
459 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
460 ; KNL-NEXT: vzeroupper
463 ; AVX512DQ-LABEL: sext_4x8mem_to_4x32:
465 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
466 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
467 ; AVX512DQ-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
468 ; AVX512DQ-NEXT: retq
469 %a = load <4 x i8>,<4 x i8> *%i,align 1
470 %x = sext <4 x i8> %a to <4 x i32>
471 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
475 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
476 ; KNL-LABEL: zext_8x8mem_to_8x32:
478 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
479 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
480 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
481 ; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
482 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
483 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
486 ; SKX-LABEL: zext_8x8mem_to_8x32:
488 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
489 ; SKX-NEXT: vpmovw2m %xmm0, %k1
490 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
493 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32:
494 ; AVX512DQNOBW: # %bb.0:
495 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
496 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
497 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
498 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
499 ; AVX512DQNOBW-NEXT: retq
500 %a = load <8 x i8>,<8 x i8> *%i,align 1
501 %x = zext <8 x i8> %a to <8 x i32>
502 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
506 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
507 ; KNL-LABEL: sext_8x8mem_to_8x32:
509 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
510 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
511 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
512 ; KNL-NEXT: vpmovsxbd (%rdi), %ymm0
513 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
514 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
517 ; SKX-LABEL: sext_8x8mem_to_8x32:
519 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
520 ; SKX-NEXT: vpmovw2m %xmm0, %k1
521 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
524 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32:
525 ; AVX512DQNOBW: # %bb.0:
526 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
527 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
528 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
529 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
530 ; AVX512DQNOBW-NEXT: retq
531 %a = load <8 x i8>,<8 x i8> *%i,align 1
532 %x = sext <8 x i8> %a to <8 x i32>
533 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
537 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
538 ; KNL-LABEL: zext_16x8mem_to_16x32:
540 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
541 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
542 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
543 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
546 ; SKX-LABEL: zext_16x8mem_to_16x32:
548 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
549 ; SKX-NEXT: vpmovb2m %xmm0, %k1
550 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
553 ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32:
554 ; AVX512DQNOBW: # %bb.0:
555 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
556 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
557 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
558 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
559 ; AVX512DQNOBW-NEXT: retq
560 %a = load <16 x i8>,<16 x i8> *%i,align 1
561 %x = zext <16 x i8> %a to <16 x i32>
562 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
566 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
567 ; KNL-LABEL: sext_16x8mem_to_16x32:
569 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
570 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
571 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
572 ; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
575 ; SKX-LABEL: sext_16x8mem_to_16x32:
577 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
578 ; SKX-NEXT: vpmovb2m %xmm0, %k1
579 ; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
582 ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32:
583 ; AVX512DQNOBW: # %bb.0:
584 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
585 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
586 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
587 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
588 ; AVX512DQNOBW-NEXT: retq
589 %a = load <16 x i8>,<16 x i8> *%i,align 1
590 %x = sext <16 x i8> %a to <16 x i32>
591 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
595 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
596 ; KNL-LABEL: zext_16x8_to_16x32_mask:
598 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
599 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
600 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
601 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
604 ; SKX-LABEL: zext_16x8_to_16x32_mask:
606 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
607 ; SKX-NEXT: vpmovb2m %xmm1, %k1
608 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
611 ; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask:
612 ; AVX512DQNOBW: # %bb.0:
613 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
614 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
615 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
616 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
617 ; AVX512DQNOBW-NEXT: retq
618 %x = zext <16 x i8> %a to <16 x i32>
619 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
623 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
624 ; KNL-LABEL: sext_16x8_to_16x32_mask:
626 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
627 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
628 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
629 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
632 ; SKX-LABEL: sext_16x8_to_16x32_mask:
634 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
635 ; SKX-NEXT: vpmovb2m %xmm1, %k1
636 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
639 ; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask:
640 ; AVX512DQNOBW: # %bb.0:
641 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
642 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
643 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
644 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
645 ; AVX512DQNOBW-NEXT: retq
646 %x = sext <16 x i8> %a to <16 x i32>
647 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
651 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
652 ; ALL-LABEL: zext_16x8_to_16x32:
654 ; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
656 %x = zext <16 x i8> %i to <16 x i32>
660 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
661 ; ALL-LABEL: sext_16x8_to_16x32:
663 ; ALL-NEXT: vpmovsxbd %xmm0, %zmm0
665 %x = sext <16 x i8> %i to <16 x i32>
669 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
670 ; KNL-LABEL: zext_2x8mem_to_2x64:
672 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
673 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
674 ; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
675 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
676 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
677 ; KNL-NEXT: vzeroupper
680 ; AVX512DQ-LABEL: zext_2x8mem_to_2x64:
682 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
683 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
684 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
685 ; AVX512DQ-NEXT: retq
686 %a = load <2 x i8>,<2 x i8> *%i,align 1
687 %x = zext <2 x i8> %a to <2 x i64>
688 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
691 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
692 ; KNL-LABEL: sext_2x8mem_to_2x64mask:
694 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
695 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
696 ; KNL-NEXT: vpmovsxbq (%rdi), %xmm0
697 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
698 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
699 ; KNL-NEXT: vzeroupper
702 ; AVX512DQ-LABEL: sext_2x8mem_to_2x64mask:
704 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
705 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
706 ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
707 ; AVX512DQ-NEXT: retq
708 %a = load <2 x i8>,<2 x i8> *%i,align 1
709 %x = sext <2 x i8> %a to <2 x i64>
710 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
713 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
714 ; ALL-LABEL: sext_2x8mem_to_2x64:
716 ; ALL-NEXT: vpmovsxbq (%rdi), %xmm0
718 %a = load <2 x i8>,<2 x i8> *%i,align 1
719 %x = sext <2 x i8> %a to <2 x i64>
723 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
724 ; KNL-LABEL: zext_4x8mem_to_4x64:
726 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
727 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
728 ; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
729 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
730 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
733 ; AVX512DQ-LABEL: zext_4x8mem_to_4x64:
735 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
736 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
737 ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
738 ; AVX512DQ-NEXT: retq
739 %a = load <4 x i8>,<4 x i8> *%i,align 1
740 %x = zext <4 x i8> %a to <4 x i64>
741 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
745 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
746 ; KNL-LABEL: sext_4x8mem_to_4x64mask:
748 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
749 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
750 ; KNL-NEXT: vpmovsxbq (%rdi), %ymm0
751 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
752 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
755 ; AVX512DQ-LABEL: sext_4x8mem_to_4x64mask:
757 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
758 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
759 ; AVX512DQ-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
760 ; AVX512DQ-NEXT: retq
761 %a = load <4 x i8>,<4 x i8> *%i,align 1
762 %x = sext <4 x i8> %a to <4 x i64>
763 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
767 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
768 ; ALL-LABEL: sext_4x8mem_to_4x64:
770 ; ALL-NEXT: vpmovsxbq (%rdi), %ymm0
772 %a = load <4 x i8>,<4 x i8> *%i,align 1
773 %x = sext <4 x i8> %a to <4 x i64>
777 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
778 ; KNL-LABEL: zext_8x8mem_to_8x64:
780 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
781 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
782 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
783 ; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
786 ; SKX-LABEL: zext_8x8mem_to_8x64:
788 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
789 ; SKX-NEXT: vpmovw2m %xmm0, %k1
790 ; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
793 ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64:
794 ; AVX512DQNOBW: # %bb.0:
795 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
796 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
797 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
798 ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
799 ; AVX512DQNOBW-NEXT: retq
800 %a = load <8 x i8>,<8 x i8> *%i,align 1
801 %x = zext <8 x i8> %a to <8 x i64>
802 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
806 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
807 ; KNL-LABEL: sext_8x8mem_to_8x64mask:
809 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
810 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
811 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
812 ; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
815 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
817 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
818 ; SKX-NEXT: vpmovw2m %xmm0, %k1
819 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
822 ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask:
823 ; AVX512DQNOBW: # %bb.0:
824 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
825 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
826 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
827 ; AVX512DQNOBW-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
828 ; AVX512DQNOBW-NEXT: retq
829 %a = load <8 x i8>,<8 x i8> *%i,align 1
830 %x = sext <8 x i8> %a to <8 x i64>
831 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
835 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
836 ; ALL-LABEL: sext_8x8mem_to_8x64:
838 ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
840 %a = load <8 x i8>,<8 x i8> *%i,align 1
841 %x = sext <8 x i8> %a to <8 x i64>
845 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
846 ; KNL-LABEL: zext_4x16mem_to_4x32:
848 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
849 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
850 ; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
851 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
852 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
853 ; KNL-NEXT: vzeroupper
856 ; AVX512DQ-LABEL: zext_4x16mem_to_4x32:
858 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
859 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
860 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
861 ; AVX512DQ-NEXT: retq
862 %a = load <4 x i16>,<4 x i16> *%i,align 1
863 %x = zext <4 x i16> %a to <4 x i32>
864 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
868 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
869 ; KNL-LABEL: sext_4x16mem_to_4x32mask:
871 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
872 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
873 ; KNL-NEXT: vpmovsxwd (%rdi), %xmm0
874 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
875 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
876 ; KNL-NEXT: vzeroupper
879 ; AVX512DQ-LABEL: sext_4x16mem_to_4x32mask:
881 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
882 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
883 ; AVX512DQ-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
884 ; AVX512DQ-NEXT: retq
885 %a = load <4 x i16>,<4 x i16> *%i,align 1
886 %x = sext <4 x i16> %a to <4 x i32>
887 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
891 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
892 ; ALL-LABEL: sext_4x16mem_to_4x32:
894 ; ALL-NEXT: vpmovsxwd (%rdi), %xmm0
896 %a = load <4 x i16>,<4 x i16> *%i,align 1
897 %x = sext <4 x i16> %a to <4 x i32>
902 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
903 ; KNL-LABEL: zext_8x16mem_to_8x32:
905 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
906 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
907 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
908 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
909 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
910 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
913 ; SKX-LABEL: zext_8x16mem_to_8x32:
915 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
916 ; SKX-NEXT: vpmovw2m %xmm0, %k1
917 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
920 ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32:
921 ; AVX512DQNOBW: # %bb.0:
922 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
923 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
924 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
925 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
926 ; AVX512DQNOBW-NEXT: retq
927 %a = load <8 x i16>,<8 x i16> *%i,align 1
928 %x = zext <8 x i16> %a to <8 x i32>
929 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
933 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
934 ; KNL-LABEL: sext_8x16mem_to_8x32mask:
936 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
937 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
938 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
939 ; KNL-NEXT: vpmovsxwd (%rdi), %ymm0
940 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
941 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
944 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
946 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
947 ; SKX-NEXT: vpmovw2m %xmm0, %k1
948 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
951 ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask:
952 ; AVX512DQNOBW: # %bb.0:
953 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
954 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
955 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
956 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
957 ; AVX512DQNOBW-NEXT: retq
958 %a = load <8 x i16>,<8 x i16> *%i,align 1
959 %x = sext <8 x i16> %a to <8 x i32>
960 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
964 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
965 ; ALL-LABEL: sext_8x16mem_to_8x32:
967 ; ALL-NEXT: vpmovsxwd (%rdi), %ymm0
969 %a = load <8 x i16>,<8 x i16> *%i,align 1
970 %x = sext <8 x i16> %a to <8 x i32>
974 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
975 ; KNL-LABEL: zext_8x16_to_8x32mask:
977 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
978 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
979 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
980 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
981 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
982 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
985 ; SKX-LABEL: zext_8x16_to_8x32mask:
987 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
988 ; SKX-NEXT: vpmovw2m %xmm1, %k1
989 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
992 ; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask:
993 ; AVX512DQNOBW: # %bb.0:
994 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
995 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
996 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
997 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
998 ; AVX512DQNOBW-NEXT: retq
999 %x = zext <8 x i16> %a to <8 x i32>
1000 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
1004 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
1005 ; ALL-LABEL: zext_8x16_to_8x32:
1007 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1009 %x = zext <8 x i16> %a to <8 x i32>
1013 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
1014 ; KNL-LABEL: zext_16x16mem_to_16x32:
1016 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1017 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1018 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1019 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1022 ; SKX-LABEL: zext_16x16mem_to_16x32:
1024 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1025 ; SKX-NEXT: vpmovb2m %xmm0, %k1
1026 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1029 ; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32:
1030 ; AVX512DQNOBW: # %bb.0:
1031 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
1032 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1033 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
1034 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1035 ; AVX512DQNOBW-NEXT: retq
1036 %a = load <16 x i16>,<16 x i16> *%i,align 1
1037 %x = zext <16 x i16> %a to <16 x i32>
1038 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1042 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
1043 ; KNL-LABEL: sext_16x16mem_to_16x32mask:
1045 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1046 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1047 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1048 ; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1051 ; SKX-LABEL: sext_16x16mem_to_16x32mask:
1053 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
1054 ; SKX-NEXT: vpmovb2m %xmm0, %k1
1055 ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1058 ; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask:
1059 ; AVX512DQNOBW: # %bb.0:
1060 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0
1061 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1062 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1
1063 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
1064 ; AVX512DQNOBW-NEXT: retq
1065 %a = load <16 x i16>,<16 x i16> *%i,align 1
1066 %x = sext <16 x i16> %a to <16 x i32>
1067 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1071 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
1072 ; ALL-LABEL: sext_16x16mem_to_16x32:
1074 ; ALL-NEXT: vpmovsxwd (%rdi), %zmm0
1076 %a = load <16 x i16>,<16 x i16> *%i,align 1
1077 %x = sext <16 x i16> %a to <16 x i32>
1080 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
1081 ; KNL-LABEL: zext_16x16_to_16x32mask:
1083 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
1084 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
1085 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
1086 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1089 ; SKX-LABEL: zext_16x16_to_16x32mask:
1091 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
1092 ; SKX-NEXT: vpmovb2m %xmm1, %k1
1093 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1096 ; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask:
1097 ; AVX512DQNOBW: # %bb.0:
1098 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1
1099 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1
1100 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1
1101 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1102 ; AVX512DQNOBW-NEXT: retq
1103 %x = zext <16 x i16> %a to <16 x i32>
1104 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
1108 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
1109 ; ALL-LABEL: zext_16x16_to_16x32:
1111 ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1113 %x = zext <16 x i16> %a to <16 x i32>
1117 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
1118 ; KNL-LABEL: zext_2x16mem_to_2x64:
1120 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1121 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1122 ; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1123 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1124 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1125 ; KNL-NEXT: vzeroupper
1128 ; AVX512DQ-LABEL: zext_2x16mem_to_2x64:
1129 ; AVX512DQ: # %bb.0:
1130 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1131 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1132 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1133 ; AVX512DQ-NEXT: retq
1134 %a = load <2 x i16>,<2 x i16> *%i,align 1
1135 %x = zext <2 x i16> %a to <2 x i64>
1136 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1140 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
1141 ; KNL-LABEL: sext_2x16mem_to_2x64mask:
1143 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1144 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1145 ; KNL-NEXT: vpmovsxwq (%rdi), %xmm0
1146 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1147 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1148 ; KNL-NEXT: vzeroupper
1151 ; AVX512DQ-LABEL: sext_2x16mem_to_2x64mask:
1152 ; AVX512DQ: # %bb.0:
1153 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1154 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1155 ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
1156 ; AVX512DQ-NEXT: retq
1157 %a = load <2 x i16>,<2 x i16> *%i,align 1
1158 %x = sext <2 x i16> %a to <2 x i64>
1159 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1163 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
1164 ; ALL-LABEL: sext_2x16mem_to_2x64:
1166 ; ALL-NEXT: vpmovsxwq (%rdi), %xmm0
1168 %a = load <2 x i16>,<2 x i16> *%i,align 1
1169 %x = sext <2 x i16> %a to <2 x i64>
1173 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
1174 ; KNL-LABEL: zext_4x16mem_to_4x64:
1176 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1177 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1178 ; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1179 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1180 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1183 ; AVX512DQ-LABEL: zext_4x16mem_to_4x64:
1184 ; AVX512DQ: # %bb.0:
1185 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1186 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1187 ; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1188 ; AVX512DQ-NEXT: retq
1189 %a = load <4 x i16>,<4 x i16> *%i,align 1
1190 %x = zext <4 x i16> %a to <4 x i64>
1191 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1195 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
1196 ; KNL-LABEL: sext_4x16mem_to_4x64mask:
1198 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1199 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1200 ; KNL-NEXT: vpmovsxwq (%rdi), %ymm0
1201 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1202 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1205 ; AVX512DQ-LABEL: sext_4x16mem_to_4x64mask:
1206 ; AVX512DQ: # %bb.0:
1207 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1208 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1209 ; AVX512DQ-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
1210 ; AVX512DQ-NEXT: retq
1211 %a = load <4 x i16>,<4 x i16> *%i,align 1
1212 %x = sext <4 x i16> %a to <4 x i64>
1213 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1217 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
1218 ; ALL-LABEL: sext_4x16mem_to_4x64:
1220 ; ALL-NEXT: vpmovsxwq (%rdi), %ymm0
1222 %a = load <4 x i16>,<4 x i16> *%i,align 1
1223 %x = sext <4 x i16> %a to <4 x i64>
1227 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1228 ; KNL-LABEL: zext_8x16mem_to_8x64:
1230 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1231 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1232 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1233 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1236 ; SKX-LABEL: zext_8x16mem_to_8x64:
1238 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1239 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1240 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1243 ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64:
1244 ; AVX512DQNOBW: # %bb.0:
1245 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1246 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1247 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1248 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1249 ; AVX512DQNOBW-NEXT: retq
1250 %a = load <8 x i16>,<8 x i16> *%i,align 1
1251 %x = zext <8 x i16> %a to <8 x i64>
1252 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1256 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
1257 ; KNL-LABEL: sext_8x16mem_to_8x64mask:
1259 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1260 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1261 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1262 ; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1265 ; SKX-LABEL: sext_8x16mem_to_8x64mask:
1267 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1268 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1269 ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1272 ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask:
1273 ; AVX512DQNOBW: # %bb.0:
1274 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1275 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1276 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1277 ; AVX512DQNOBW-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
1278 ; AVX512DQNOBW-NEXT: retq
1279 %a = load <8 x i16>,<8 x i16> *%i,align 1
1280 %x = sext <8 x i16> %a to <8 x i64>
1281 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1285 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
1286 ; ALL-LABEL: sext_8x16mem_to_8x64:
1288 ; ALL-NEXT: vpmovsxwq (%rdi), %zmm0
1290 %a = load <8 x i16>,<8 x i16> *%i,align 1
1291 %x = sext <8 x i16> %a to <8 x i64>
1295 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
1296 ; KNL-LABEL: zext_8x16_to_8x64mask:
1298 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
1299 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
1300 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
1301 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1304 ; SKX-LABEL: zext_8x16_to_8x64mask:
1306 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
1307 ; SKX-NEXT: vpmovw2m %xmm1, %k1
1308 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1311 ; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask:
1312 ; AVX512DQNOBW: # %bb.0:
1313 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1314 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1315 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1316 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1317 ; AVX512DQNOBW-NEXT: retq
1318 %x = zext <8 x i16> %a to <8 x i64>
1319 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1323 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
1324 ; ALL-LABEL: zext_8x16_to_8x64:
1326 ; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1328 %ret = zext <8 x i16> %a to <8 x i64>
1332 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1333 ; KNL-LABEL: zext_2x32mem_to_2x64:
1335 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1336 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1337 ; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1338 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1339 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1340 ; KNL-NEXT: vzeroupper
1343 ; AVX512DQ-LABEL: zext_2x32mem_to_2x64:
1344 ; AVX512DQ: # %bb.0:
1345 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1346 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1347 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
1348 ; AVX512DQ-NEXT: retq
1349 %a = load <2 x i32>,<2 x i32> *%i,align 1
1350 %x = zext <2 x i32> %a to <2 x i64>
1351 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1355 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
1356 ; KNL-LABEL: sext_2x32mem_to_2x64mask:
1358 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
1359 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1360 ; KNL-NEXT: vpmovsxdq (%rdi), %xmm0
1361 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1362 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1363 ; KNL-NEXT: vzeroupper
1366 ; AVX512DQ-LABEL: sext_2x32mem_to_2x64mask:
1367 ; AVX512DQ: # %bb.0:
1368 ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0
1369 ; AVX512DQ-NEXT: vpmovq2m %xmm0, %k1
1370 ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
1371 ; AVX512DQ-NEXT: retq
1372 %a = load <2 x i32>,<2 x i32> *%i,align 1
1373 %x = sext <2 x i32> %a to <2 x i64>
1374 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
1378 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
1379 ; ALL-LABEL: sext_2x32mem_to_2x64:
1381 ; ALL-NEXT: vpmovsxdq (%rdi), %xmm0
1383 %a = load <2 x i32>,<2 x i32> *%i,align 1
1384 %x = sext <2 x i32> %a to <2 x i64>
1388 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1389 ; KNL-LABEL: zext_4x32mem_to_4x64:
1391 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1392 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1393 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1394 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1395 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1398 ; AVX512DQ-LABEL: zext_4x32mem_to_4x64:
1399 ; AVX512DQ: # %bb.0:
1400 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1401 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1402 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1403 ; AVX512DQ-NEXT: retq
1404 %a = load <4 x i32>,<4 x i32> *%i,align 1
1405 %x = zext <4 x i32> %a to <4 x i64>
1406 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1410 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
1411 ; KNL-LABEL: sext_4x32mem_to_4x64mask:
1413 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1414 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
1415 ; KNL-NEXT: vpmovsxdq (%rdi), %ymm0
1416 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1417 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1420 ; AVX512DQ-LABEL: sext_4x32mem_to_4x64mask:
1421 ; AVX512DQ: # %bb.0:
1422 ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0
1423 ; AVX512DQ-NEXT: vpmovd2m %xmm0, %k1
1424 ; AVX512DQ-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
1425 ; AVX512DQ-NEXT: retq
1426 %a = load <4 x i32>,<4 x i32> *%i,align 1
1427 %x = sext <4 x i32> %a to <4 x i64>
1428 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1432 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
1433 ; ALL-LABEL: sext_4x32mem_to_4x64:
1435 ; ALL-NEXT: vpmovsxdq (%rdi), %ymm0
1437 %a = load <4 x i32>,<4 x i32> *%i,align 1
1438 %x = sext <4 x i32> %a to <4 x i64>
1442 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
1443 ; ALL-LABEL: sext_4x32_to_4x64:
1445 ; ALL-NEXT: vpmovsxdq %xmm0, %ymm0
1447 %x = sext <4 x i32> %a to <4 x i64>
1451 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
1452 ; KNL-LABEL: zext_4x32_to_4x64mask:
1454 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1
1455 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
1456 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1457 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
1458 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1461 ; AVX512DQ-LABEL: zext_4x32_to_4x64mask:
1462 ; AVX512DQ: # %bb.0:
1463 ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1
1464 ; AVX512DQ-NEXT: vpmovd2m %xmm1, %k1
1465 ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1466 ; AVX512DQ-NEXT: retq
1467 %x = zext <4 x i32> %a to <4 x i64>
1468 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
1472 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1473 ; KNL-LABEL: zext_8x32mem_to_8x64:
1475 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1476 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1477 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1478 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1481 ; SKX-LABEL: zext_8x32mem_to_8x64:
1483 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1484 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1485 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1488 ; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64:
1489 ; AVX512DQNOBW: # %bb.0:
1490 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1491 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1492 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1493 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1494 ; AVX512DQNOBW-NEXT: retq
1495 %a = load <8 x i32>,<8 x i32> *%i,align 1
1496 %x = zext <8 x i32> %a to <8 x i64>
1497 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1501 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
1502 ; KNL-LABEL: sext_8x32mem_to_8x64mask:
1504 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1505 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1506 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1507 ; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1510 ; SKX-LABEL: sext_8x32mem_to_8x64mask:
1512 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1513 ; SKX-NEXT: vpmovw2m %xmm0, %k1
1514 ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1517 ; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask:
1518 ; AVX512DQNOBW: # %bb.0:
1519 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1520 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1521 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1
1522 ; AVX512DQNOBW-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
1523 ; AVX512DQNOBW-NEXT: retq
1524 %a = load <8 x i32>,<8 x i32> *%i,align 1
1525 %x = sext <8 x i32> %a to <8 x i64>
1526 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1530 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
1531 ; ALL-LABEL: sext_8x32mem_to_8x64:
1533 ; ALL-NEXT: vpmovsxdq (%rdi), %zmm0
1535 %a = load <8 x i32>,<8 x i32> *%i,align 1
1536 %x = sext <8 x i32> %a to <8 x i64>
1540 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
1541 ; ALL-LABEL: sext_8x32_to_8x64:
1543 ; ALL-NEXT: vpmovsxdq %ymm0, %zmm0
1545 %x = sext <8 x i32> %a to <8 x i64>
1549 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
1550 ; KNL-LABEL: zext_8x32_to_8x64mask:
1552 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
1553 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
1554 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
1555 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1558 ; SKX-LABEL: zext_8x32_to_8x64mask:
1560 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
1561 ; SKX-NEXT: vpmovw2m %xmm1, %k1
1562 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1565 ; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask:
1566 ; AVX512DQNOBW: # %bb.0:
1567 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1
1568 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1
1569 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1
1570 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
1571 ; AVX512DQNOBW-NEXT: retq
1572 %x = zext <8 x i32> %a to <8 x i64>
1573 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
1576 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
1577 ; ALL-LABEL: fptrunc_test:
1579 ; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0
1581 %b = fptrunc <8 x double> %a to <8 x float>
1585 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
1586 ; ALL-LABEL: fpext_test:
1588 ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
1590 %b = fpext <8 x float> %a to <8 x double>
1594 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
1595 ; KNL-LABEL: zext_16i1_to_16xi32:
1597 ; KNL-NEXT: kmovw %edi, %k1
1598 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1599 ; KNL-NEXT: vpsrld $31, %zmm0, %zmm0
1602 ; SKX-LABEL: zext_16i1_to_16xi32:
1604 ; SKX-NEXT: kmovd %edi, %k0
1605 ; SKX-NEXT: vpmovm2d %k0, %zmm0
1606 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0
1609 ; AVX512DQNOBW-LABEL: zext_16i1_to_16xi32:
1610 ; AVX512DQNOBW: # %bb.0:
1611 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1612 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %zmm0
1613 ; AVX512DQNOBW-NEXT: vpsrld $31, %zmm0, %zmm0
1614 ; AVX512DQNOBW-NEXT: retq
1615 %a = bitcast i16 %b to <16 x i1>
1616 %c = zext <16 x i1> %a to <16 x i32>
1620 define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
1621 ; KNL-LABEL: zext_8i1_to_8xi64:
1623 ; KNL-NEXT: kmovw %edi, %k1
1624 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1625 ; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0
1628 ; SKX-LABEL: zext_8i1_to_8xi64:
1630 ; SKX-NEXT: kmovd %edi, %k0
1631 ; SKX-NEXT: vpmovm2q %k0, %zmm0
1632 ; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0
1635 ; AVX512DQNOBW-LABEL: zext_8i1_to_8xi64:
1636 ; AVX512DQNOBW: # %bb.0:
1637 ; AVX512DQNOBW-NEXT: kmovw %edi, %k0
1638 ; AVX512DQNOBW-NEXT: vpmovm2q %k0, %zmm0
1639 ; AVX512DQNOBW-NEXT: vpsrlq $63, %zmm0, %zmm0
1640 ; AVX512DQNOBW-NEXT: retq
1641 %a = bitcast i8 %b to <8 x i1>
1642 %c = zext <8 x i1> %a to <8 x i64>
1646 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
1647 ; ALL-LABEL: trunc_16i8_to_16i1:
1649 ; ALL-NEXT: vpsllw $7, %xmm0, %xmm0
1650 ; ALL-NEXT: vpmovmskb %xmm0, %eax
1651 ; ALL-NEXT: # kill: def $ax killed $ax killed $eax
1653 %mask_b = trunc <16 x i8>%a to <16 x i1>
1654 %mask = bitcast <16 x i1> %mask_b to i16
1658 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
1659 ; KNL-LABEL: trunc_16i32_to_16i1:
1661 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1662 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1663 ; KNL-NEXT: kmovw %k0, %eax
1664 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
1665 ; KNL-NEXT: vzeroupper
1668 ; SKX-LABEL: trunc_16i32_to_16i1:
1670 ; SKX-NEXT: vpslld $31, %zmm0, %zmm0
1671 ; SKX-NEXT: vpmovd2m %zmm0, %k0
1672 ; SKX-NEXT: kmovd %k0, %eax
1673 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
1674 ; SKX-NEXT: vzeroupper
1677 ; AVX512DQNOBW-LABEL: trunc_16i32_to_16i1:
1678 ; AVX512DQNOBW: # %bb.0:
1679 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
1680 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0
1681 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1682 ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
1683 ; AVX512DQNOBW-NEXT: vzeroupper
1684 ; AVX512DQNOBW-NEXT: retq
1685 %mask_b = trunc <16 x i32>%a to <16 x i1>
1686 %mask = bitcast <16 x i1> %mask_b to i16
1690 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
1691 ; ALL-LABEL: trunc_4i32_to_4i1:
1693 ; ALL-NEXT: vpand %xmm1, %xmm0, %xmm0
1694 ; ALL-NEXT: vpslld $31, %xmm0, %xmm0
1695 ; ALL-NEXT: vpsrad $31, %xmm0, %xmm0
1697 %mask_a = trunc <4 x i32>%a to <4 x i1>
1698 %mask_b = trunc <4 x i32>%b to <4 x i1>
1699 %a_and_b = and <4 x i1>%mask_a, %mask_b
1700 %res = sext <4 x i1>%a_and_b to <4 x i32>
1705 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
1706 ; KNL-LABEL: trunc_8i16_to_8i1:
1708 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1709 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
1710 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1711 ; KNL-NEXT: kmovw %k0, %eax
1712 ; KNL-NEXT: # kill: def $al killed $al killed $eax
1713 ; KNL-NEXT: vzeroupper
1716 ; SKX-LABEL: trunc_8i16_to_8i1:
1718 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
1719 ; SKX-NEXT: vpmovw2m %xmm0, %k0
1720 ; SKX-NEXT: kmovd %k0, %eax
1721 ; SKX-NEXT: # kill: def $al killed $al killed $eax
1724 ; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1:
1725 ; AVX512DQNOBW: # %bb.0:
1726 ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0
1727 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0
1728 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0
1729 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1730 ; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax
1731 ; AVX512DQNOBW-NEXT: vzeroupper
1732 ; AVX512DQNOBW-NEXT: retq
1733 %mask_b = trunc <8 x i16>%a to <8 x i1>
1734 %mask = bitcast <8 x i1> %mask_b to i8
1738 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1739 ; KNL-LABEL: sext_8i1_8i32:
1741 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1742 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
1743 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1746 ; AVX512DQ-LABEL: sext_8i1_8i32:
1747 ; AVX512DQ: # %bb.0:
1748 ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1749 ; AVX512DQ-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0
1750 ; AVX512DQ-NEXT: retq
1751 %x = icmp slt <8 x i32> %a1, %a2
1752 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
1753 %y = sext <8 x i1> %x1 to <8 x i32>
1758 define i16 @trunc_i32_to_i1(i32 %a) {
1759 ; KNL-LABEL: trunc_i32_to_i1:
1761 ; KNL-NEXT: movw $-4, %ax
1762 ; KNL-NEXT: kmovw %eax, %k0
1763 ; KNL-NEXT: kshiftrw $1, %k0, %k0
1764 ; KNL-NEXT: kshiftlw $1, %k0, %k0
1765 ; KNL-NEXT: andl $1, %edi
1766 ; KNL-NEXT: kmovw %edi, %k1
1767 ; KNL-NEXT: korw %k1, %k0, %k0
1768 ; KNL-NEXT: kmovw %k0, %eax
1769 ; KNL-NEXT: # kill: def $ax killed $ax killed $eax
1772 ; SKX-LABEL: trunc_i32_to_i1:
1774 ; SKX-NEXT: movw $-4, %ax
1775 ; SKX-NEXT: kmovd %eax, %k0
1776 ; SKX-NEXT: kshiftrw $1, %k0, %k0
1777 ; SKX-NEXT: kshiftlw $1, %k0, %k0
1778 ; SKX-NEXT: andl $1, %edi
1779 ; SKX-NEXT: kmovw %edi, %k1
1780 ; SKX-NEXT: korw %k1, %k0, %k0
1781 ; SKX-NEXT: kmovd %k0, %eax
1782 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
1785 ; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
1786 ; AVX512DQNOBW: # %bb.0:
1787 ; AVX512DQNOBW-NEXT: movw $-4, %ax
1788 ; AVX512DQNOBW-NEXT: kmovw %eax, %k0
1789 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
1790 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
1791 ; AVX512DQNOBW-NEXT: andl $1, %edi
1792 ; AVX512DQNOBW-NEXT: kmovw %edi, %k1
1793 ; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
1794 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax
1795 ; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
1796 ; AVX512DQNOBW-NEXT: retq
1797 %a_i = trunc i32 %a to i1
1798 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
1799 %res = bitcast <16 x i1> %maskv to i16
1803 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1804 ; KNL-LABEL: sext_8i1_8i16:
1806 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1807 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
1808 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1809 ; KNL-NEXT: vzeroupper
1812 ; SKX-LABEL: sext_8i1_8i16:
1814 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1815 ; SKX-NEXT: vpmovm2w %k0, %xmm0
1816 ; SKX-NEXT: vzeroupper
1819 ; AVX512DQNOBW-LABEL: sext_8i1_8i16:
1820 ; AVX512DQNOBW: # %bb.0:
1821 ; AVX512DQNOBW-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1822 ; AVX512DQNOBW-NEXT: vpmovm2d %k0, %ymm0
1823 ; AVX512DQNOBW-NEXT: vpmovdw %ymm0, %xmm0
1824 ; AVX512DQNOBW-NEXT: vzeroupper
1825 ; AVX512DQNOBW-NEXT: retq
1826 %x = icmp slt <8 x i32> %a1, %a2
1827 %y = sext <8 x i1> %x to <8 x i16>
1831 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
1832 ; KNL-LABEL: sext_16i1_16i32:
1834 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1835 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1838 ; AVX512DQ-LABEL: sext_16i1_16i32:
1839 ; AVX512DQ: # %bb.0:
1840 ; AVX512DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
1841 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1842 ; AVX512DQ-NEXT: retq
1843 %x = icmp slt <16 x i32> %a1, %a2
1844 %y = sext <16 x i1> %x to <16 x i32>
1848 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
1849 ; KNL-LABEL: sext_8i1_8i64:
1851 ; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
1852 ; KNL-NEXT: vpmovsxdq %ymm0, %zmm0
1855 ; AVX512DQ-LABEL: sext_8i1_8i64:
1856 ; AVX512DQ: # %bb.0:
1857 ; AVX512DQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
1858 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
1859 ; AVX512DQ-NEXT: retq
1860 %x = icmp slt <8 x i32> %a1, %a2
1861 %y = sext <8 x i1> %x to <8 x i64>
1865 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
1866 ; ALL-LABEL: extload_v8i64:
1868 ; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
1869 ; ALL-NEXT: vmovdqa64 %zmm0, (%rsi)
1870 ; ALL-NEXT: vzeroupper
1872 %sign_load = load <8 x i8>, <8 x i8>* %a
1873 %c = sext <8 x i8> %sign_load to <8 x i64>
1874 store <8 x i64> %c, <8 x i64>* %res
1878 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
1879 ; KNL-LABEL: test21:
1881 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
1882 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
1883 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
1884 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
1885 ; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
1886 ; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
1887 ; KNL-NEXT: vpand %ymm0, %ymm4, %ymm0
1888 ; KNL-NEXT: vpsllw $15, %ymm5, %ymm4
1889 ; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
1890 ; KNL-NEXT: vpand %ymm1, %ymm4, %ymm1
1891 ; KNL-NEXT: vpsllw $15, %ymm6, %ymm4
1892 ; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
1893 ; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2
1894 ; KNL-NEXT: vpsllw $15, %ymm7, %ymm4
1895 ; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
1896 ; KNL-NEXT: vpand %ymm3, %ymm4, %ymm3
1899 ; SKX-LABEL: test21:
1901 ; SKX-NEXT: vpsllw $7, %zmm2, %zmm2
1902 ; SKX-NEXT: vpmovb2m %zmm2, %k1
1903 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
1904 ; SKX-NEXT: kshiftrq $32, %k1, %k1
1905 ; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z}
1908 ; AVX512DQNOBW-LABEL: test21:
1909 ; AVX512DQNOBW: # %bb.0:
1910 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
1911 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
1912 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
1913 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
1914 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm4, %ymm4
1915 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm4, %ymm4
1916 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm4, %ymm0
1917 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm5, %ymm4
1918 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm4, %ymm4
1919 ; AVX512DQNOBW-NEXT: vpand %ymm1, %ymm4, %ymm1
1920 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm6, %ymm4
1921 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm4, %ymm4
1922 ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm4, %ymm2
1923 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm7, %ymm4
1924 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm4, %ymm4
1925 ; AVX512DQNOBW-NEXT: vpand %ymm3, %ymm4, %ymm3
1926 ; AVX512DQNOBW-NEXT: retq
1927 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
1931 define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
1932 ; ALL-LABEL: shuffle_zext_16x8_to_16x16:
1934 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1936 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
1937 %2 = bitcast <32 x i8> %1 to <16 x i16>
1941 define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
1942 ; KNL-LABEL: shuffle_zext_16x8_to_16x16_mask:
1944 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1945 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1946 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
1947 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
1948 ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
1951 ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
1953 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
1954 ; SKX-NEXT: vpmovb2m %xmm1, %k1
1955 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1958 ; AVX512DQNOBW-LABEL: shuffle_zext_16x8_to_16x16_mask:
1959 ; AVX512DQNOBW: # %bb.0:
1960 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1961 ; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1962 ; AVX512DQNOBW-NEXT: vpsllw $15, %ymm1, %ymm1
1963 ; AVX512DQNOBW-NEXT: vpsraw $15, %ymm1, %ymm1
1964 ; AVX512DQNOBW-NEXT: vpand %ymm0, %ymm1, %ymm0
1965 ; AVX512DQNOBW-NEXT: retq
1966 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
1967 %bc = bitcast <32 x i8> %x to <16 x i16>
1968 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
1972 define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
1973 ; ALL-LABEL: zext_32x8_to_16x16:
1975 ; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1977 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
1978 %2 = bitcast <32 x i8> %1 to <16 x i16>
1982 define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
1983 ; ALL-LABEL: zext_32x8_to_8x32:
1985 ; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1987 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
1988 %2 = bitcast <32 x i8> %1 to <8 x i32>
1992 define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
1993 ; ALL-LABEL: zext_32x8_to_4x64:
1995 ; ALL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1997 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
1998 %2 = bitcast <32 x i8> %1 to <4 x i64>
2002 define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
2003 ; ALL-LABEL: zext_16x16_to_8x32:
2005 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2007 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
2008 %2 = bitcast <16 x i16> %1 to <8 x i32>
2012 define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
2013 ; ALL-LABEL: zext_16x16_to_4x64:
2015 ; ALL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2017 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
2018 %2 = bitcast <16 x i16> %1 to <4 x i64>
2022 define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
2023 ; ALL-LABEL: zext_8x32_to_4x64:
2025 ; ALL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2027 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
2028 %2 = bitcast <8 x i32> %1 to <4 x i64>
2032 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
2033 ; KNL-LABEL: zext_64xi1_to_64xi8:
2035 ; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
2036 ; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2037 ; KNL-NEXT: vpand %ymm2, %ymm0, %ymm0
2038 ; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
2039 ; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
2042 ; SKX-LABEL: zext_64xi1_to_64xi8:
2044 ; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
2045 ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
2048 ; AVX512DQNOBW-LABEL: zext_64xi1_to_64xi8:
2049 ; AVX512DQNOBW: # %bb.0:
2050 ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
2051 ; AVX512DQNOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2052 ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm0, %ymm0
2053 ; AVX512DQNOBW-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
2054 ; AVX512DQNOBW-NEXT: vpand %ymm2, %ymm1, %ymm1
2055 ; AVX512DQNOBW-NEXT: retq
2056 %mask = icmp eq <64 x i8> %x, %y
2057 %1 = zext <64 x i1> %mask to <64 x i8>
2061 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
2062 ; KNL-LABEL: zext_32xi1_to_32xi16:
2064 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
2065 ; KNL-NEXT: vpsrlw $15, %ymm0, %ymm0
2066 ; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
2067 ; KNL-NEXT: vpsrlw $15, %ymm1, %ymm1
2070 ; SKX-LABEL: zext_32xi1_to_32xi16:
2072 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
2073 ; SKX-NEXT: vpmovm2w %k0, %zmm0
2074 ; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0
2077 ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi16:
2078 ; AVX512DQNOBW: # %bb.0:
2079 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
2080 ; AVX512DQNOBW-NEXT: vpsrlw $15, %ymm0, %ymm0
2081 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
2082 ; AVX512DQNOBW-NEXT: vpsrlw $15, %ymm1, %ymm1
2083 ; AVX512DQNOBW-NEXT: retq
2084 %mask = icmp eq <32 x i16> %x, %y
2085 %1 = zext <32 x i1> %mask to <32 x i16>
2089 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
2090 ; ALL-LABEL: zext_16xi1_to_16xi16:
2092 ; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
2093 ; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0
2095 %mask = icmp eq <16 x i16> %x, %y
2096 %1 = zext <16 x i1> %mask to <16 x i16>
2101 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
2102 ; KNL-LABEL: zext_32xi1_to_32xi8:
2104 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
2105 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
2106 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
2107 ; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
2108 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
2109 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
2110 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2111 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
2114 ; SKX-LABEL: zext_32xi1_to_32xi8:
2116 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
2117 ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
2120 ; AVX512DQNOBW-LABEL: zext_32xi1_to_32xi8:
2121 ; AVX512DQNOBW: # %bb.0:
2122 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
2123 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
2124 ; AVX512DQNOBW-NEXT: vpmovdb %zmm0, %xmm0
2125 ; AVX512DQNOBW-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
2126 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
2127 ; AVX512DQNOBW-NEXT: vpmovdb %zmm1, %xmm1
2128 ; AVX512DQNOBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2129 ; AVX512DQNOBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
2130 ; AVX512DQNOBW-NEXT: retq
2131 %mask = icmp eq <32 x i16> %x, %y
2132 %1 = zext <32 x i1> %mask to <32 x i8>
2136 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
2137 ; ALL-LABEL: zext_4xi1_to_4x32:
2139 ; ALL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255]
2140 ; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
2141 ; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0
2142 ; ALL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2143 ; ALL-NEXT: vpsrld $31, %xmm0, %xmm0
2145 %mask = icmp eq <4 x i8> %x, %y
2146 %1 = zext <4 x i1> %mask to <4 x i32>
2150 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
2151 ; ALL-LABEL: zext_2xi1_to_2xi64:
2153 ; ALL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [255,255]
2154 ; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
2155 ; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0
2156 ; ALL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
2157 ; ALL-NEXT: vpsrlq $63, %xmm0, %xmm0
2159 %mask = icmp eq <2 x i8> %x, %y
2160 %1 = zext <2 x i1> %mask to <2 x i64>