1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=CHECK
4 define <4 x i32> @mask_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) {
5 ; CHECK-LABEL: mask_shuffle_v4i32_1234:
7 ; CHECK-NEXT: kmovd %edi, %k1
8 ; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[1,2,3],xmm1[0]
9 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
11 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
12 %mask.cast = bitcast i8 %mask to <8 x i1>
13 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
18 define <4 x i32> @maskz_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
19 ; CHECK-LABEL: maskz_shuffle_v4i32_1234:
21 ; CHECK-NEXT: kmovd %edi, %k1
22 ; CHECK-NEXT: valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3],xmm1[0]
24 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
25 %mask.cast = bitcast i8 %mask to <8 x i1>
26 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
31 define <4 x i32> @mask_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) {
32 ; CHECK-LABEL: mask_shuffle_v4i32_2345:
34 ; CHECK-NEXT: kmovd %edi, %k1
35 ; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[2,3],xmm1[0,1]
36 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
38 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
39 %mask.cast = bitcast i8 %mask to <8 x i1>
40 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
41 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
45 define <4 x i32> @maskz_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
46 ; CHECK-LABEL: maskz_shuffle_v4i32_2345:
48 ; CHECK-NEXT: kmovd %edi, %k1
49 ; CHECK-NEXT: valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3],xmm1[0,1]
51 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
52 %mask.cast = bitcast i8 %mask to <8 x i1>
53 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
54 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
58 define <2 x i64> @mask_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passthru, i8 %mask) {
59 ; CHECK-LABEL: mask_shuffle_v2i64_12:
61 ; CHECK-NEXT: kmovd %edi, %k1
62 ; CHECK-NEXT: valignq {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
63 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
65 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
66 %mask.cast = bitcast i8 %mask to <8 x i1>
67 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
68 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
72 define <2 x i64> @maskz_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
73 ; CHECK-LABEL: maskz_shuffle_v2i64_12:
75 ; CHECK-NEXT: kmovd %edi, %k1
76 ; CHECK-NEXT: valignq {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
78 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
79 %mask.cast = bitcast i8 %mask to <8 x i1>
80 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
81 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
85 define <4 x i64> @mask_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passthru, i8 %mask) {
86 ; CHECK-LABEL: mask_shuffle_v4i64_1234:
88 ; CHECK-NEXT: kmovd %edi, %k1
89 ; CHECK-NEXT: valignq {{.*#+}} ymm2 {%k1} = ymm0[1,2,3],ymm1[0]
90 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
92 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
93 %mask.cast = bitcast i8 %mask to <8 x i1>
94 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
95 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
99 define <4 x i64> @maskz_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
100 ; CHECK-LABEL: maskz_shuffle_v4i64_1234:
102 ; CHECK-NEXT: kmovd %edi, %k1
103 ; CHECK-NEXT: valignq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3],ymm1[0]
105 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
106 %mask.cast = bitcast i8 %mask to <8 x i1>
107 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
108 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
112 define <4 x i64> @mask_shuffle_v4i64_1230(<4 x i64> %a, <4 x i64> %passthru, i8 %mask) {
113 ; CHECK-LABEL: mask_shuffle_v4i64_1230:
115 ; CHECK-NEXT: kmovd %edi, %k1
116 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,0]
117 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
119 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
120 %mask.cast = bitcast i8 %mask to <8 x i1>
121 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
122 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
126 define <4 x i64> @maskz_shuffle_v4i64_1230(<4 x i64> %a, i8 %mask) {
127 ; CHECK-LABEL: maskz_shuffle_v4i64_1230:
129 ; CHECK-NEXT: kmovd %edi, %k1
130 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,0]
132 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
133 %mask.cast = bitcast i8 %mask to <8 x i1>
134 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
135 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
139 define <8 x i32> @mask_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) {
140 ; CHECK-LABEL: mask_shuffle_v8i32_12345678:
142 ; CHECK-NEXT: kmovd %edi, %k1
143 ; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[1,2,3,4,5,6,7],ymm1[0]
144 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
146 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
147 %mask.cast = bitcast i8 %mask to <8 x i1>
148 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
152 define <8 x i32> @maskz_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
153 ; CHECK-LABEL: maskz_shuffle_v8i32_12345678:
155 ; CHECK-NEXT: kmovd %edi, %k1
156 ; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7],ymm1[0]
158 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
159 %mask.cast = bitcast i8 %mask to <8 x i1>
160 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
164 define <8 x i32> @mask_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) {
165 ; CHECK-LABEL: mask_shuffle_v8i32_23456789:
167 ; CHECK-NEXT: kmovd %edi, %k1
168 ; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[2,3,4,5,6,7],ymm1[0,1]
169 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
171 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
172 %mask.cast = bitcast i8 %mask to <8 x i1>
173 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
177 define <8 x i32> @maskz_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
178 ; CHECK-LABEL: maskz_shuffle_v8i32_23456789:
180 ; CHECK-NEXT: kmovd %edi, %k1
181 ; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,4,5,6,7],ymm1[0,1]
183 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
184 %mask.cast = bitcast i8 %mask to <8 x i1>
185 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
189 define <8 x i32> @mask_shuffle_v8i32_12345670(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) {
190 ; CHECK-LABEL: mask_shuffle_v8i32_12345670:
192 ; CHECK-NEXT: kmovd %edi, %k1
193 ; CHECK-NEXT: valignd {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,4,5,6,7,0]
194 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
196 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
197 %mask.cast = bitcast i8 %mask to <8 x i1>
198 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
202 define <8 x i32> @maskz_shuffle_v8i32_12345670(<8 x i32> %a, i8 %mask) {
203 ; CHECK-LABEL: maskz_shuffle_v8i32_12345670:
205 ; CHECK-NEXT: kmovd %edi, %k1
206 ; CHECK-NEXT: valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7,0]
208 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
209 %mask.cast = bitcast i8 %mask to <8 x i1>
210 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
214 define <8 x i32> @mask_shuffle_v8i32_23456701(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) {
215 ; CHECK-LABEL: mask_shuffle_v8i32_23456701:
217 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
218 ; CHECK-NEXT: kmovd %edi, %k1
219 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
221 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
222 %mask.cast = bitcast i8 %mask to <8 x i1>
223 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
227 define <8 x i32> @maskz_shuffle_v8i32_23456701(<8 x i32> %a, i8 %mask) {
228 ; CHECK-LABEL: maskz_shuffle_v8i32_23456701:
230 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
231 ; CHECK-NEXT: kmovd %edi, %k1
232 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
234 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
235 %mask.cast = bitcast i8 %mask to <8 x i1>
236 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
240 define <4 x i32> @mask_extract_v8i32_v4i32_0(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) {
241 ; CHECK-LABEL: mask_extract_v8i32_v4i32_0:
243 ; CHECK-NEXT: kmovd %edi, %k1
244 ; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
245 ; CHECK-NEXT: vzeroupper
247 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
248 %mask.cast = bitcast i8 %mask to <8 x i1>
249 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
250 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
254 define <4 x i32> @mask_extract_v8i32_v4i32_0_z(<8 x i32> %a, i8 %mask) {
255 ; CHECK-LABEL: mask_extract_v8i32_v4i32_0_z:
257 ; CHECK-NEXT: kmovd %edi, %k1
258 ; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
259 ; CHECK-NEXT: vzeroupper
261 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
262 %mask.cast = bitcast i8 %mask to <8 x i1>
263 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
264 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
268 define <4 x i32> @mask_extract_v8i32_v4i32_1(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) {
269 ; CHECK-LABEL: mask_extract_v8i32_v4i32_1:
271 ; CHECK-NEXT: kmovd %edi, %k1
272 ; CHECK-NEXT: vextracti32x4 $1, %ymm0, %xmm1 {%k1}
273 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
274 ; CHECK-NEXT: vzeroupper
276 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
277 %mask.cast = bitcast i8 %mask to <8 x i1>
278 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
279 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
283 define <4 x i32> @mask_extract_v8i32_v4i32_1_z(<8 x i32> %a, i8 %mask) {
284 ; CHECK-LABEL: mask_extract_v8i32_v4i32_1_z:
286 ; CHECK-NEXT: kmovd %edi, %k1
287 ; CHECK-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
288 ; CHECK-NEXT: vzeroupper
290 %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
291 %mask.cast = bitcast i8 %mask to <8 x i1>
292 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
297 define <4 x float> @mask_extract_v8f32_v4f32_0(<8 x float> %a, <4 x float> %passthru, i8 %mask) {
298 ; CHECK-LABEL: mask_extract_v8f32_v4f32_0:
300 ; CHECK-NEXT: kmovd %edi, %k1
301 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
302 ; CHECK-NEXT: vzeroupper
304 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
305 %mask.cast = bitcast i8 %mask to <8 x i1>
306 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
307 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
311 define <4 x float> @mask_extract_v8f32_v4f32_0_z(<8 x float> %a, i8 %mask) {
312 ; CHECK-LABEL: mask_extract_v8f32_v4f32_0_z:
314 ; CHECK-NEXT: kmovd %edi, %k1
315 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
316 ; CHECK-NEXT: vzeroupper
318 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
319 %mask.cast = bitcast i8 %mask to <8 x i1>
320 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
321 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
325 define <4 x float> @mask_extract_v8f32_v4f32_1(<8 x float> %a, <4 x float> %passthru, i8 %mask) {
326 ; CHECK-LABEL: mask_extract_v8f32_v4f32_1:
328 ; CHECK-NEXT: kmovd %edi, %k1
329 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1}
330 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
331 ; CHECK-NEXT: vzeroupper
333 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
334 %mask.cast = bitcast i8 %mask to <8 x i1>
335 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
336 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
340 define <4 x float> @mask_extract_v8f32_v4f32_1_z(<8 x float> %a, i8 %mask) {
341 ; CHECK-LABEL: mask_extract_v8f32_v4f32_1_z:
343 ; CHECK-NEXT: kmovd %edi, %k1
344 ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
345 ; CHECK-NEXT: vzeroupper
347 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
348 %mask.cast = bitcast i8 %mask to <8 x i1>
349 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
350 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
354 define <2 x i64> @mask_extract_v4i64_v2i64_0(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) {
355 ; CHECK-LABEL: mask_extract_v4i64_v2i64_0:
357 ; CHECK-NEXT: kmovd %edi, %k1
358 ; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
359 ; CHECK-NEXT: vzeroupper
361 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
362 %mask.cast = bitcast i8 %mask to <8 x i1>
363 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
364 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
368 define <2 x i64> @mask_extract_v4i64_v2i64_0_z(<4 x i64> %a, i8 %mask) {
369 ; CHECK-LABEL: mask_extract_v4i64_v2i64_0_z:
371 ; CHECK-NEXT: kmovd %edi, %k1
372 ; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
373 ; CHECK-NEXT: vzeroupper
375 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
376 %mask.cast = bitcast i8 %mask to <8 x i1>
377 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
378 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
382 define <2 x i64> @mask_extract_v4i64_v2i64_1(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) {
383 ; CHECK-LABEL: mask_extract_v4i64_v2i64_1:
385 ; CHECK-NEXT: kmovd %edi, %k1
386 ; CHECK-NEXT: vextracti64x2 $1, %ymm0, %xmm1 {%k1}
387 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
388 ; CHECK-NEXT: vzeroupper
390 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
391 %mask.cast = bitcast i8 %mask to <8 x i1>
392 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
393 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
397 define <2 x i64> @mask_extract_v4i64_v2i64_1_z(<4 x i64> %a, i8 %mask) {
398 ; CHECK-LABEL: mask_extract_v4i64_v2i64_1_z:
400 ; CHECK-NEXT: kmovd %edi, %k1
401 ; CHECK-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
402 ; CHECK-NEXT: vzeroupper
404 %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
405 %mask.cast = bitcast i8 %mask to <8 x i1>
406 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
407 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
411 define <2 x double> @mask_extract_v4f64_v2f64_0(<4 x double> %a, <2 x double> %passthru, i8 %mask) {
412 ; CHECK-LABEL: mask_extract_v4f64_v2f64_0:
414 ; CHECK-NEXT: kmovd %edi, %k1
415 ; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
416 ; CHECK-NEXT: vzeroupper
418 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
419 %mask.cast = bitcast i8 %mask to <8 x i1>
420 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
421 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
422 ret <2 x double> %res
425 define <2 x double> @mask_extract_v4f64_v2f64_0_z(<4 x double> %a, i8 %mask) {
426 ; CHECK-LABEL: mask_extract_v4f64_v2f64_0_z:
428 ; CHECK-NEXT: kmovd %edi, %k1
429 ; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
430 ; CHECK-NEXT: vzeroupper
432 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
433 %mask.cast = bitcast i8 %mask to <8 x i1>
434 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
435 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
436 ret <2 x double> %res
439 define <2 x double> @mask_extract_v4f64_v2f64_1(<4 x double> %a, <2 x double> %passthru, i8 %mask) {
440 ; CHECK-LABEL: mask_extract_v4f64_v2f64_1:
442 ; CHECK-NEXT: kmovd %edi, %k1
443 ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1}
444 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
445 ; CHECK-NEXT: vzeroupper
447 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
448 %mask.cast = bitcast i8 %mask to <8 x i1>
449 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
450 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
451 ret <2 x double> %res
454 define <2 x double> @mask_extract_v4f64_v2f64_1_z(<4 x double> %a, i8 %mask) {
455 ; CHECK-LABEL: mask_extract_v4f64_v2f64_1_z:
457 ; CHECK-NEXT: kmovd %edi, %k1
458 ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
459 ; CHECK-NEXT: vzeroupper
461 %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
462 %mask.cast = bitcast i8 %mask to <8 x i1>
463 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
464 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
465 ret <2 x double> %res
468 define <4 x i32> @mask_extract_v16i32_v4i32_0(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
469 ; CHECK-LABEL: mask_extract_v16i32_v4i32_0:
471 ; CHECK-NEXT: kmovd %edi, %k1
472 ; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
473 ; CHECK-NEXT: vzeroupper
475 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
476 %mask.cast = bitcast i8 %mask to <8 x i1>
477 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
478 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
482 define <4 x i32> @mask_extract_v16i32_v4i32_0_z(<16 x i32> %a, i8 %mask) {
483 ; CHECK-LABEL: mask_extract_v16i32_v4i32_0_z:
485 ; CHECK-NEXT: kmovd %edi, %k1
486 ; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
487 ; CHECK-NEXT: vzeroupper
489 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
490 %mask.cast = bitcast i8 %mask to <8 x i1>
491 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
492 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
496 define <4 x i32> @mask_extract_v16i32_v4i32_1(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
497 ; CHECK-LABEL: mask_extract_v16i32_v4i32_1:
499 ; CHECK-NEXT: kmovd %edi, %k1
500 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm1 {%k1}
501 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
502 ; CHECK-NEXT: vzeroupper
504 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
505 %mask.cast = bitcast i8 %mask to <8 x i1>
506 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
507 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
511 define <4 x i32> @mask_extract_v16i32_v4i32_1_z(<16 x i32> %a, i8 %mask) {
512 ; CHECK-LABEL: mask_extract_v16i32_v4i32_1_z:
514 ; CHECK-NEXT: kmovd %edi, %k1
515 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z}
516 ; CHECK-NEXT: vzeroupper
518 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
519 %mask.cast = bitcast i8 %mask to <8 x i1>
520 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
521 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
525 define <4 x i32> @mask_extract_v16i32_v4i32_2(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
526 ; CHECK-LABEL: mask_extract_v16i32_v4i32_2:
528 ; CHECK-NEXT: kmovd %edi, %k1
529 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1 {%k1}
530 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
531 ; CHECK-NEXT: vzeroupper
533 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
534 %mask.cast = bitcast i8 %mask to <8 x i1>
535 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
536 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
540 define <4 x i32> @mask_extract_v16i32_v4i32_3(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
541 ; CHECK-LABEL: mask_extract_v16i32_v4i32_3:
543 ; CHECK-NEXT: kmovd %edi, %k1
544 ; CHECK-NEXT: vextracti32x4 $3, %zmm0, %xmm1 {%k1}
545 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
546 ; CHECK-NEXT: vzeroupper
548 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
549 %mask.cast = bitcast i8 %mask to <8 x i1>
550 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
551 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
555 define <4 x float> @mask_extract_v16f32_v4f32_0(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
556 ; CHECK-LABEL: mask_extract_v16f32_v4f32_0:
558 ; CHECK-NEXT: kmovd %edi, %k1
559 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
560 ; CHECK-NEXT: vzeroupper
562 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
563 %mask.cast = bitcast i8 %mask to <8 x i1>
564 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
565 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
569 define <4 x float> @mask_extract_v16f32_v4f32_0_z(<16 x float> %a, i8 %mask) {
570 ; CHECK-LABEL: mask_extract_v16f32_v4f32_0_z:
572 ; CHECK-NEXT: kmovd %edi, %k1
573 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
574 ; CHECK-NEXT: vzeroupper
576 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
577 %mask.cast = bitcast i8 %mask to <8 x i1>
578 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
579 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
583 define <4 x float> @mask_extract_v16f32_v4f32_1(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
584 ; CHECK-LABEL: mask_extract_v16f32_v4f32_1:
586 ; CHECK-NEXT: kmovd %edi, %k1
587 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm1 {%k1}
588 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
589 ; CHECK-NEXT: vzeroupper
591 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
592 %mask.cast = bitcast i8 %mask to <8 x i1>
593 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
594 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
598 define <4 x float> @mask_extract_v16f32_v4f32_1_z(<16 x float> %a, i8 %mask) {
599 ; CHECK-LABEL: mask_extract_v16f32_v4f32_1_z:
601 ; CHECK-NEXT: kmovd %edi, %k1
602 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
603 ; CHECK-NEXT: vzeroupper
605 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
606 %mask.cast = bitcast i8 %mask to <8 x i1>
607 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
608 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
612 define <4 x float> @mask_extract_v16f32_v4f32_2(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
613 ; CHECK-LABEL: mask_extract_v16f32_v4f32_2:
615 ; CHECK-NEXT: kmovd %edi, %k1
616 ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 {%k1}
617 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
618 ; CHECK-NEXT: vzeroupper
620 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
621 %mask.cast = bitcast i8 %mask to <8 x i1>
622 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
623 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
627 define <4 x float> @mask_extract_v16f32_v4f32_3(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
628 ; CHECK-LABEL: mask_extract_v16f32_v4f32_3:
630 ; CHECK-NEXT: kmovd %edi, %k1
631 ; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm1 {%k1}
632 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
633 ; CHECK-NEXT: vzeroupper
635 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
636 %mask.cast = bitcast i8 %mask to <8 x i1>
637 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
638 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
642 define <8 x i32> @mask_extract_v16i32_v8i32_0(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) {
643 ; CHECK-LABEL: mask_extract_v16i32_v8i32_0:
645 ; CHECK-NEXT: kmovd %edi, %k1
646 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
648 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
649 %mask.cast = bitcast i8 %mask to <8 x i1>
650 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
654 define <8 x i32> @mask_extract_v16i32_v8i32_0_z(<16 x i32> %a, i8 %mask) {
655 ; CHECK-LABEL: mask_extract_v16i32_v8i32_0_z:
657 ; CHECK-NEXT: kmovd %edi, %k1
658 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
660 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
661 %mask.cast = bitcast i8 %mask to <8 x i1>
662 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
666 define <8 x i32> @mask_extract_v16i32_v8i32_1(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) {
667 ; CHECK-LABEL: mask_extract_v16i32_v8i32_1:
669 ; CHECK-NEXT: kmovd %edi, %k1
670 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm1 {%k1}
671 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
673 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
674 %mask.cast = bitcast i8 %mask to <8 x i1>
675 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
679 define <8 x i32> @mask_extract_v16i32_v8i32_1_z(<16 x i32> %a, i8 %mask) {
680 ; CHECK-LABEL: mask_extract_v16i32_v8i32_1_z:
682 ; CHECK-NEXT: kmovd %edi, %k1
683 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z}
685 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
686 %mask.cast = bitcast i8 %mask to <8 x i1>
687 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
691 define <8 x float> @mask_extract_v16f32_v8f32_0(<16 x float> %a, <8 x float> %passthru, i8 %mask) {
692 ; CHECK-LABEL: mask_extract_v16f32_v8f32_0:
694 ; CHECK-NEXT: kmovd %edi, %k1
695 ; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
697 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
698 %mask.cast = bitcast i8 %mask to <8 x i1>
699 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> %passthru
703 define <8 x float> @mask_extract_v16f32_v8f32_0_z(<16 x float> %a, i8 %mask) {
704 ; CHECK-LABEL: mask_extract_v16f32_v8f32_0_z:
706 ; CHECK-NEXT: kmovd %edi, %k1
707 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
709 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
710 %mask.cast = bitcast i8 %mask to <8 x i1>
711 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> zeroinitializer
715 define <8 x float> @mask_extract_v16f32_v8f32_1(<16 x float> %a, <8 x float> %passthru, i8 %mask) {
716 ; CHECK-LABEL: mask_extract_v16f32_v8f32_1:
718 ; CHECK-NEXT: kmovd %edi, %k1
719 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1}
720 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
722 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
723 %mask.cast = bitcast i8 %mask to <8 x i1>
724 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> %passthru
728 define <8 x float> @mask_extract_v16f32_v8f32_1_z(<16 x float> %a, i8 %mask) {
729 ; CHECK-LABEL: mask_extract_v16f32_v8f32_1_z:
731 ; CHECK-NEXT: kmovd %edi, %k1
732 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
734 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
735 %mask.cast = bitcast i8 %mask to <8 x i1>
736 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> zeroinitializer
740 define <2 x i64> @mask_extract_v8i64_v2i64_0(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
741 ; CHECK-LABEL: mask_extract_v8i64_v2i64_0:
743 ; CHECK-NEXT: kmovd %edi, %k1
744 ; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
745 ; CHECK-NEXT: vzeroupper
747 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
748 %mask.cast = bitcast i8 %mask to <8 x i1>
749 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
750 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
754 define <2 x i64> @mask_extract_v8i64_v2i64_0_z(<8 x i64> %a, i8 %mask) {
755 ; CHECK-LABEL: mask_extract_v8i64_v2i64_0_z:
757 ; CHECK-NEXT: kmovd %edi, %k1
758 ; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
759 ; CHECK-NEXT: vzeroupper
761 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
762 %mask.cast = bitcast i8 %mask to <8 x i1>
763 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
764 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
768 define <2 x i64> @mask_extract_v8i64_v2i64_1(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
769 ; CHECK-LABEL: mask_extract_v8i64_v2i64_1:
771 ; CHECK-NEXT: kmovd %edi, %k1
772 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm1 {%k1}
773 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
774 ; CHECK-NEXT: vzeroupper
776 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
777 %mask.cast = bitcast i8 %mask to <8 x i1>
778 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
779 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
783 define <2 x i64> @mask_extract_v8i64_v2i64_1_z(<8 x i64> %a, i8 %mask) {
784 ; CHECK-LABEL: mask_extract_v8i64_v2i64_1_z:
786 ; CHECK-NEXT: kmovd %edi, %k1
787 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z}
788 ; CHECK-NEXT: vzeroupper
790 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
791 %mask.cast = bitcast i8 %mask to <8 x i1>
792 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
793 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
797 define <2 x i64> @mask_extract_v8i64_v2i64_2(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
798 ; CHECK-LABEL: mask_extract_v8i64_v2i64_2:
800 ; CHECK-NEXT: kmovd %edi, %k1
801 ; CHECK-NEXT: vextracti64x2 $2, %zmm0, %xmm1 {%k1}
802 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
803 ; CHECK-NEXT: vzeroupper
805 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 4, i32 5>
806 %mask.cast = bitcast i8 %mask to <8 x i1>
807 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
808 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
812 define <2 x i64> @mask_extract_v8i64_v2i64_3(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
813 ; CHECK-LABEL: mask_extract_v8i64_v2i64_3:
815 ; CHECK-NEXT: kmovd %edi, %k1
816 ; CHECK-NEXT: vextracti64x2 $3, %zmm0, %xmm1 {%k1}
817 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
818 ; CHECK-NEXT: vzeroupper
820 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
821 %mask.cast = bitcast i8 %mask to <8 x i1>
822 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
823 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
827 define <2 x double> @mask_extract_v8f64_v2f64_0(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
828 ; CHECK-LABEL: mask_extract_v8f64_v2f64_0:
830 ; CHECK-NEXT: kmovd %edi, %k1
831 ; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
832 ; CHECK-NEXT: vzeroupper
834 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
835 %mask.cast = bitcast i8 %mask to <8 x i1>
836 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
837 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
838 ret <2 x double> %res
841 define <2 x double> @mask_extract_v8f64_v2f64_0_z(<8 x double> %a, i8 %mask) {
842 ; CHECK-LABEL: mask_extract_v8f64_v2f64_0_z:
844 ; CHECK-NEXT: kmovd %edi, %k1
845 ; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
846 ; CHECK-NEXT: vzeroupper
848 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
849 %mask.cast = bitcast i8 %mask to <8 x i1>
850 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
851 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
852 ret <2 x double> %res
855 define <2 x double> @mask_extract_v8f64_v2f64_1(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
856 ; CHECK-LABEL: mask_extract_v8f64_v2f64_1:
858 ; CHECK-NEXT: kmovd %edi, %k1
859 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1}
860 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
861 ; CHECK-NEXT: vzeroupper
863 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
864 %mask.cast = bitcast i8 %mask to <8 x i1>
865 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
866 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
867 ret <2 x double> %res
870 define <2 x double> @mask_extract_v8f64_v2f64_1_z(<8 x double> %a, i8 %mask) {
871 ; CHECK-LABEL: mask_extract_v8f64_v2f64_1_z:
873 ; CHECK-NEXT: kmovd %edi, %k1
874 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z}
875 ; CHECK-NEXT: vzeroupper
877 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
878 %mask.cast = bitcast i8 %mask to <8 x i1>
879 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
880 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
881 ret <2 x double> %res
884 define <2 x double> @mask_extract_v8f64_v2f64_2(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
885 ; CHECK-LABEL: mask_extract_v8f64_v2f64_2:
887 ; CHECK-NEXT: kmovd %edi, %k1
888 ; CHECK-NEXT: vextractf64x2 $2, %zmm0, %xmm1 {%k1}
889 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
890 ; CHECK-NEXT: vzeroupper
892 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 4, i32 5>
893 %mask.cast = bitcast i8 %mask to <8 x i1>
894 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
895 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
896 ret <2 x double> %res
899 define <2 x double> @mask_extract_v8f64_v2f64_3(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
900 ; CHECK-LABEL: mask_extract_v8f64_v2f64_3:
902 ; CHECK-NEXT: kmovd %edi, %k1
903 ; CHECK-NEXT: vextractf64x2 $3, %zmm0, %xmm1 {%k1}
904 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
905 ; CHECK-NEXT: vzeroupper
907 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 6, i32 7>
908 %mask.cast = bitcast i8 %mask to <8 x i1>
909 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
910 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
911 ret <2 x double> %res
914 define <4 x i64> @mask_extract_v8i64_v4i64_0(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) {
915 ; CHECK-LABEL: mask_extract_v8i64_v4i64_0:
917 ; CHECK-NEXT: kmovd %edi, %k1
918 ; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
920 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
921 %mask.cast = bitcast i8 %mask to <8 x i1>
922 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
923 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
927 define <4 x i64> @mask_extract_v8i64_v4i64_0_z(<8 x i64> %a, i8 %mask) {
928 ; CHECK-LABEL: mask_extract_v8i64_v4i64_0_z:
930 ; CHECK-NEXT: kmovd %edi, %k1
931 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
933 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
934 %mask.cast = bitcast i8 %mask to <8 x i1>
935 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
936 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
940 define <4 x i64> @mask_extract_v8i64_v4i64_1(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) {
941 ; CHECK-LABEL: mask_extract_v8i64_v4i64_1:
943 ; CHECK-NEXT: kmovd %edi, %k1
944 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 {%k1}
945 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
947 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
948 %mask.cast = bitcast i8 %mask to <8 x i1>
949 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
950 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
954 define <4 x i64> @mask_extract_v8i64_v4i64_1_z(<8 x i64> %a, i8 %mask) {
955 ; CHECK-LABEL: mask_extract_v8i64_v4i64_1_z:
957 ; CHECK-NEXT: kmovd %edi, %k1
958 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z}
960 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
961 %mask.cast = bitcast i8 %mask to <8 x i1>
962 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
963 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
967 define <4 x double> @mask_extract_v8f64_v4f64_0(<8 x double> %a, <4 x double> %passthru, i8 %mask) {
968 ; CHECK-LABEL: mask_extract_v8f64_v4f64_0:
970 ; CHECK-NEXT: kmovd %edi, %k1
971 ; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
973 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
974 %mask.cast = bitcast i8 %mask to <8 x i1>
975 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
976 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> %passthru
977 ret <4 x double> %res
980 define <4 x double> @mask_extract_v8f64_v4f64_0_z(<8 x double> %a, i8 %mask) {
981 ; CHECK-LABEL: mask_extract_v8f64_v4f64_0_z:
983 ; CHECK-NEXT: kmovd %edi, %k1
984 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
986 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
987 %mask.cast = bitcast i8 %mask to <8 x i1>
988 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
989 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> zeroinitializer
990 ret <4 x double> %res
993 define <4 x double> @mask_extract_v8f64_v4f64_1(<8 x double> %a, <4 x double> %passthru, i8 %mask) {
994 ; CHECK-LABEL: mask_extract_v8f64_v4f64_1:
996 ; CHECK-NEXT: kmovd %edi, %k1
997 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 {%k1}
998 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1000 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1001 %mask.cast = bitcast i8 %mask to <8 x i1>
1002 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1003 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> %passthru
1004 ret <4 x double> %res
1007 define <4 x double> @mask_extract_v8f64_v4f64_1_z(<8 x double> %a, i8 %mask) {
1008 ; CHECK-LABEL: mask_extract_v8f64_v4f64_1_z:
1010 ; CHECK-NEXT: kmovd %edi, %k1
1011 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
1013 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1014 %mask.cast = bitcast i8 %mask to <8 x i1>
1015 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1016 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> zeroinitializer
1017 ret <4 x double> %res
1020 define <8 x i32> @mask_cast_extract_v8i64_v8i32_0(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) {
1021 ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0:
1023 ; CHECK-NEXT: kmovd %edi, %k1
1024 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
1026 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1027 %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
1028 %mask.cast = bitcast i8 %mask to <8 x i1>
1029 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> %passthru
1033 define <8 x i32> @mask_cast_extract_v8i64_v8i32_0_z(<8 x i64> %a, i8 %mask) {
1034 ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0_z:
1036 ; CHECK-NEXT: kmovd %edi, %k1
1037 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
1039 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1040 %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
1041 %mask.cast = bitcast i8 %mask to <8 x i1>
1042 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> zeroinitializer
1046 define <8 x i32> @mask_cast_extract_v8i64_v8i32_1(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) {
1047 ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1:
1049 ; CHECK-NEXT: kmovd %edi, %k1
1050 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm1 {%k1}
1051 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1053 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1054 %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
1055 %mask.cast = bitcast i8 %mask to <8 x i1>
1056 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> %passthru
1060 define <8 x i32> @mask_cast_extract_v8i64_v8i32_1_z(<8 x i64> %a, i8 %mask) {
1061 ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1_z:
1063 ; CHECK-NEXT: kmovd %edi, %k1
1064 ; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z}
1066 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1067 %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
1068 %mask.cast = bitcast i8 %mask to <8 x i1>
1069 %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> zeroinitializer
1073 define <8 x float> @mask_cast_extract_v8f64_v8f32_0(<8 x double> %a, <8 x float> %passthru, i8 %mask) {
1074 ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0:
1076 ; CHECK-NEXT: kmovd %edi, %k1
1077 ; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
1079 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1080 %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
1081 %mask.cast = bitcast i8 %mask to <8 x i1>
1082 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> %passthru
1083 ret <8 x float> %res
1086 define <8 x float> @mask_cast_extract_v8f64_v8f32_0_z(<8 x double> %a, i8 %mask) {
1087 ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0_z:
1089 ; CHECK-NEXT: kmovd %edi, %k1
1090 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
1092 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1093 %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
1094 %mask.cast = bitcast i8 %mask to <8 x i1>
1095 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> zeroinitializer
1096 ret <8 x float> %res
1099 define <8 x float> @mask_cast_extract_v8f64_v8f32_1(<8 x double> %a, <8 x float> %passthru, i8 %mask) {
1100 ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1:
1102 ; CHECK-NEXT: kmovd %edi, %k1
1103 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1}
1104 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1106 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1107 %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
1108 %mask.cast = bitcast i8 %mask to <8 x i1>
1109 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> %passthru
1110 ret <8 x float> %res
1113 define <8 x float> @mask_cast_extract_v8f64_v8f32_1_z(<8 x double> %a, i8 %mask) {
1114 ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1_z:
1116 ; CHECK-NEXT: kmovd %edi, %k1
1117 ; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
1119 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1120 %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
1121 %mask.cast = bitcast i8 %mask to <8 x i1>
1122 %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> zeroinitializer
1123 ret <8 x float> %res
1126 define <4 x i32> @mask_cast_extract_v8i64_v4i32_0(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) {
1127 ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0:
1129 ; CHECK-NEXT: kmovd %edi, %k1
1130 ; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
1131 ; CHECK-NEXT: vzeroupper
1133 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
1134 %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
1135 %mask.cast = bitcast i8 %mask to <8 x i1>
1136 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1137 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> %passthru
1141 define <4 x i32> @mask_cast_extract_v8i64_v4i32_0_z(<8 x i64> %a, i8 %mask) {
1142 ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0_z:
1144 ; CHECK-NEXT: kmovd %edi, %k1
1145 ; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1146 ; CHECK-NEXT: vzeroupper
1148 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
1149 %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
1150 %mask.cast = bitcast i8 %mask to <8 x i1>
1151 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> zeroinitializer
1156 define <4 x i32> @mask_cast_extract_v8i64_v4i32_1(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) {
1157 ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1:
1159 ; CHECK-NEXT: kmovd %edi, %k1
1160 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm1 {%k1}
1161 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
1162 ; CHECK-NEXT: vzeroupper
1164 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
1165 %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
1166 %mask.cast = bitcast i8 %mask to <8 x i1>
1167 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1168 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> %passthru
1172 define <4 x i32> @mask_cast_extract_v8i64_v4i32_1_z(<8 x i64> %a, i8 %mask) {
1173 ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1_z:
1175 ; CHECK-NEXT: kmovd %edi, %k1
1176 ; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z}
1177 ; CHECK-NEXT: vzeroupper
1179 %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
1180 %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
1181 %mask.cast = bitcast i8 %mask to <8 x i1>
1182 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1183 %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> zeroinitializer
1187 define <4 x float> @mask_cast_extract_v8f64_v4f32_0(<8 x double> %a, <4 x float> %passthru, i8 %mask) {
1188 ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0:
1190 ; CHECK-NEXT: kmovd %edi, %k1
1191 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
1192 ; CHECK-NEXT: vzeroupper
1194 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1195 %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
1196 %mask.cast = bitcast i8 %mask to <8 x i1>
1197 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1198 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> %passthru
1199 ret <4 x float> %res
1202 define <4 x float> @mask_cast_extract_v8f64_v4f32_0_z(<8 x double> %a, i8 %mask) {
1203 ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0_z:
1205 ; CHECK-NEXT: kmovd %edi, %k1
1206 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
1207 ; CHECK-NEXT: vzeroupper
1209 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1210 %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
1211 %mask.cast = bitcast i8 %mask to <8 x i1>
1212 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1213 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> zeroinitializer
1214 ret <4 x float> %res
1217 define <4 x float> @mask_cast_extract_v8f64_v4f32_1(<8 x double> %a, <4 x float> %passthru, i8 %mask) {
1218 ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1:
1220 ; CHECK-NEXT: kmovd %edi, %k1
1221 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm1 {%k1}
1222 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
1223 ; CHECK-NEXT: vzeroupper
1225 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1226 %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
1227 %mask.cast = bitcast i8 %mask to <8 x i1>
1228 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1229 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> %passthru
1230 ret <4 x float> %res
1233 define <4 x float> @mask_cast_extract_v8f64_v4f32_1_z(<8 x double> %a, i8 %mask) {
1234 ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1_z:
1236 ; CHECK-NEXT: kmovd %edi, %k1
1237 ; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
1238 ; CHECK-NEXT: vzeroupper
1240 %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1241 %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
1242 %mask.cast = bitcast i8 %mask to <8 x i1>
1243 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1244 %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> zeroinitializer
1245 ret <4 x float> %res
1248 define <4 x i64> @mask_cast_extract_v16i32_v4i64_0(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) {
1249 ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0:
1251 ; CHECK-NEXT: kmovd %edi, %k1
1252 ; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
1254 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1255 %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
1256 %mask.cast = bitcast i8 %mask to <8 x i1>
1257 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1258 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> %passthru
1262 define <4 x i64> @mask_cast_extract_v16i32_v4i64_0_z(<16 x i32> %a, i8 %mask) {
1263 ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0_z:
1265 ; CHECK-NEXT: kmovd %edi, %k1
1266 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
1268 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1269 %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
1270 %mask.cast = bitcast i8 %mask to <8 x i1>
1271 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1272 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> zeroinitializer
1276 define <4 x i64> @mask_cast_extract_v16i32_v4i64_1(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) {
1277 ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1:
1279 ; CHECK-NEXT: kmovd %edi, %k1
1280 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm1 {%k1}
1281 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1283 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1284 %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
1285 %mask.cast = bitcast i8 %mask to <8 x i1>
1286 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1287 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> %passthru
1291 define <4 x i64> @mask_cast_extract_v16i32_v4i64_1_z(<16 x i32> %a, i8 %mask) {
1292 ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1_z:
1294 ; CHECK-NEXT: kmovd %edi, %k1
1295 ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z}
1297 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1298 %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
1299 %mask.cast = bitcast i8 %mask to <8 x i1>
1300 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1301 %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> zeroinitializer
1305 define <4 x double> @mask_cast_extract_v16f32_v4f64_0(<16 x float> %a, <4 x double> %passthru, i8 %mask) {
1306 ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0:
1308 ; CHECK-NEXT: kmovd %edi, %k1
1309 ; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
1311 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1312 %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
1313 %mask.cast = bitcast i8 %mask to <8 x i1>
1314 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1315 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> %passthru
1316 ret <4 x double> %res
1319 define <4 x double> @mask_cast_extract_v16f32_v4f64_0_z(<16 x float> %a, i8 %mask) {
1320 ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0_z:
1322 ; CHECK-NEXT: kmovd %edi, %k1
1323 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z}
1325 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1326 %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
1327 %mask.cast = bitcast i8 %mask to <8 x i1>
1328 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1329 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> zeroinitializer
1330 ret <4 x double> %res
1333 define <4 x double> @mask_cast_extract_v16f32_v4f64_1(<16 x float> %a, <4 x double> %passthru, i8 %mask) {
1334 ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1:
1336 ; CHECK-NEXT: kmovd %edi, %k1
1337 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 {%k1}
1338 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1340 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1341 %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
1342 %mask.cast = bitcast i8 %mask to <8 x i1>
1343 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1344 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> %passthru
1345 ret <4 x double> %res
1348 define <4 x double> @mask_cast_extract_v16f32_v4f64_1_z(<16 x float> %a, i8 %mask) {
1349 ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1_z:
1351 ; CHECK-NEXT: kmovd %edi, %k1
1352 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
1354 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1355 %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
1356 %mask.cast = bitcast i8 %mask to <8 x i1>
1357 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1358 %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> zeroinitializer
1359 ret <4 x double> %res
1362 define <2 x i64> @mask_cast_extract_v16i32_v2i64_0(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) {
1363 ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0:
1365 ; CHECK-NEXT: kmovd %edi, %k1
1366 ; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
1367 ; CHECK-NEXT: vzeroupper
1369 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1370 %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
1371 %mask.cast = bitcast i8 %mask to <8 x i1>
1372 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1373 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> %passthru
1377 define <2 x i64> @mask_cast_extract_v16i32_v2i64_0_z(<16 x i32> %a, i8 %mask) {
1378 ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0_z:
1380 ; CHECK-NEXT: kmovd %edi, %k1
1381 ; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1382 ; CHECK-NEXT: vzeroupper
1384 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1385 %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
1386 %mask.cast = bitcast i8 %mask to <8 x i1>
1387 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1388 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> zeroinitializer
1392 define <2 x i64> @mask_cast_extract_v16i32_v2i64_1(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) {
1393 ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1:
1395 ; CHECK-NEXT: kmovd %edi, %k1
1396 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm1 {%k1}
1397 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
1398 ; CHECK-NEXT: vzeroupper
1400 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1401 %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
1402 %mask.cast = bitcast i8 %mask to <8 x i1>
1403 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1404 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> %passthru
1408 define <2 x i64> @mask_cast_extract_v16i32_v2i64_1_z(<16 x i32> %a, i8 %mask) {
1409 ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1_z:
1411 ; CHECK-NEXT: kmovd %edi, %k1
1412 ; CHECK-NEXT: vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z}
1413 ; CHECK-NEXT: vzeroupper
1415 %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1416 %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
1417 %mask.cast = bitcast i8 %mask to <8 x i1>
1418 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1419 %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> zeroinitializer
1423 define <2 x double> @mask_cast_extract_v16f32_v2f64_0(<16 x float> %a, <2 x double> %passthru, i8 %mask) {
1424 ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0:
1426 ; CHECK-NEXT: kmovd %edi, %k1
1427 ; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
1428 ; CHECK-NEXT: vzeroupper
1430 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1431 %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
1432 %mask.cast = bitcast i8 %mask to <8 x i1>
1433 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1434 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru
1435 ret <2 x double> %res
1438 define <2 x double> @mask_cast_extract_v16f32_v2f64_0_z(<16 x float> %a, i8 %mask) {
1439 ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0_z:
1441 ; CHECK-NEXT: kmovd %edi, %k1
1442 ; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z}
1443 ; CHECK-NEXT: vzeroupper
1445 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1446 %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
1447 %mask.cast = bitcast i8 %mask to <8 x i1>
1448 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1449 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> zeroinitializer
1450 ret <2 x double> %res
1453 define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x double> %passthru, i8 %mask) {
1454 ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1:
1456 ; CHECK-NEXT: kmovd %edi, %k1
1457 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1}
1458 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
1459 ; CHECK-NEXT: vzeroupper
1461 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1462 %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
1463 %mask.cast = bitcast i8 %mask to <8 x i1>
1464 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1465 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru
1466 ret <2 x double> %res
1469 define <2 x double> @mask_cast_extract_v16f32_v2f64_1_z(<16 x float> %a, i8 %mask) {
1470 ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1_z:
1472 ; CHECK-NEXT: kmovd %edi, %k1
1473 ; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z}
1474 ; CHECK-NEXT: vzeroupper
1476 %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1477 %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
1478 %mask.cast = bitcast i8 %mask to <8 x i1>
1479 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1480 %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> zeroinitializer
1481 ret <2 x double> %res
1484 define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) {
1485 ; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask:
1487 ; CHECK-NEXT: kmovd %esi, %k1
1488 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
1490 %q = load double, double* %x, align 1
1491 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
1492 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
1493 %mask.cast = bitcast i8 %mask to <8 x i1>
1494 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1495 %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> %passthru
1496 ret <2 x double> %res
1499 define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) {
1500 ; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz:
1502 ; CHECK-NEXT: kmovd %esi, %k1
1503 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
1505 %q = load double, double* %x, align 1
1506 %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
1507 %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
1508 %mask.cast = bitcast i8 %mask to <8 x i1>
1509 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1510 %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer
1511 ret <2 x double> %res
1514 define <8 x float> @test_broadcast_2f64_8f32_mask(<2 x double> *%p, i8 %mask, <8 x float> %passthru) nounwind {
1515 ; CHECK-LABEL: test_broadcast_2f64_8f32_mask:
1517 ; CHECK-NEXT: kmovd %esi, %k1
1518 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1520 %1 = load <2 x double>, <2 x double> *%p
1521 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1522 %3 = bitcast <4 x double> %2 to <8 x float>
1523 %mask.cast = bitcast i8 %mask to <8 x i1>
1524 %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> %passthru
1525 ret <8 x float> %res
1528 define <8 x float> @test_broadcast_2f64_8f32_maskz(<2 x double> *%p, i8 %mask) nounwind {
1529 ; CHECK-LABEL: test_broadcast_2f64_8f32_maskz:
1531 ; CHECK-NEXT: kmovd %esi, %k1
1532 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1534 %1 = load <2 x double>, <2 x double> *%p
1535 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1536 %3 = bitcast <4 x double> %2 to <8 x float>
1537 %mask.cast = bitcast i8 %mask to <8 x i1>
1538 %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> zeroinitializer
1539 ret <8 x float> %res
1542 define <8 x i32> @test_broadcast_2i64_8i32_mask(<2 x i64> *%p, i8 %mask, <8 x i32> %passthru) nounwind {
1543 ; CHECK-LABEL: test_broadcast_2i64_8i32_mask:
1545 ; CHECK-NEXT: kmovd %esi, %k1
1546 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1548 %1 = load <2 x i64>, <2 x i64> *%p
1549 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1550 %3 = bitcast <4 x i64> %2 to <8 x i32>
1551 %mask.cast = bitcast i8 %mask to <8 x i1>
1552 %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> %passthru
1556 define <8 x i32> @test_broadcast_2i64_8i32_maskz(<2 x i64> *%p, i8 %mask) nounwind {
1557 ; CHECK-LABEL: test_broadcast_2i64_8i32_maskz:
1559 ; CHECK-NEXT: kmovd %esi, %k1
1560 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1562 %1 = load <2 x i64>, <2 x i64> *%p
1563 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1564 %3 = bitcast <4 x i64> %2 to <8 x i32>
1565 %mask.cast = bitcast i8 %mask to <8 x i1>
1566 %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> zeroinitializer
1570 define <16 x float> @test_broadcast_2f64_16f32_mask(<2 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
1571 ; CHECK-LABEL: test_broadcast_2f64_16f32_mask:
1573 ; CHECK-NEXT: kmovd %esi, %k1
1574 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1576 %1 = load <2 x double>, <2 x double> *%p
1577 %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1578 %3 = bitcast <8 x double> %2 to <16 x float>
1579 %mask.cast = bitcast i16 %mask to <16 x i1>
1580 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
1581 ret <16 x float> %res
1584 define <16 x float> @test_broadcast_2f64_16f32_maskz(<2 x double> *%p, i16 %mask) nounwind {
1585 ; CHECK-LABEL: test_broadcast_2f64_16f32_maskz:
1587 ; CHECK-NEXT: kmovd %esi, %k1
1588 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1590 %1 = load <2 x double>, <2 x double> *%p
1591 %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1592 %3 = bitcast <8 x double> %2 to <16 x float>
1593 %mask.cast = bitcast i16 %mask to <16 x i1>
1594 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer
1595 ret <16 x float> %res
1598 define <16 x i32> @test_broadcast_2i64_16i32_mask(<2 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
1599 ; CHECK-LABEL: test_broadcast_2i64_16i32_mask:
1601 ; CHECK-NEXT: kmovd %esi, %k1
1602 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1604 %1 = load <2 x i64>, <2 x i64> *%p
1605 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1606 %3 = bitcast <8 x i64> %2 to <16 x i32>
1607 %mask.cast = bitcast i16 %mask to <16 x i1>
1608 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
1612 define <16 x i32> @test_broadcast_2i64_16i32_maskz(<2 x i64> *%p, i16 %mask) nounwind {
1613 ; CHECK-LABEL: test_broadcast_2i64_16i32_maskz:
1615 ; CHECK-NEXT: kmovd %esi, %k1
1616 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1618 %1 = load <2 x i64>, <2 x i64> *%p
1619 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1620 %3 = bitcast <8 x i64> %2 to <16 x i32>
1621 %mask.cast = bitcast i16 %mask to <16 x i1>
1622 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer
1626 define <16 x float> @test_broadcast_4f64_16f32_mask(<4 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
1627 ; CHECK-LABEL: test_broadcast_4f64_16f32_mask:
1629 ; CHECK-NEXT: kmovd %esi, %k1
1630 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1632 %1 = load <4 x double>, <4 x double> *%p
1633 %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1634 %3 = bitcast <8 x double> %2 to <16 x float>
1635 %mask.cast = bitcast i16 %mask to <16 x i1>
1636 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
1637 ret <16 x float> %res
1640 define <16 x float> @test_broadcast_4f64_16f32_maskz(<4 x double> *%p, i16 %mask) nounwind {
1641 ; CHECK-LABEL: test_broadcast_4f64_16f32_maskz:
1643 ; CHECK-NEXT: kmovd %esi, %k1
1644 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1646 %1 = load <4 x double>, <4 x double> *%p
1647 %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1648 %3 = bitcast <8 x double> %2 to <16 x float>
1649 %mask.cast = bitcast i16 %mask to <16 x i1>
1650 %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer
1651 ret <16 x float> %res
1654 define <16 x i32> @test_broadcast_4i64_16i32_mask(<4 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
1655 ; CHECK-LABEL: test_broadcast_4i64_16i32_mask:
1657 ; CHECK-NEXT: kmovd %esi, %k1
1658 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1660 %1 = load <4 x i64>, <4 x i64> *%p
1661 %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1662 %3 = bitcast <8 x i64> %2 to <16 x i32>
1663 %mask.cast = bitcast i16 %mask to <16 x i1>
1664 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
1668 define <16 x i32> @test_broadcast_4i64_16i32_maskz(<4 x i64> *%p, i16 %mask) nounwind {
1669 ; CHECK-LABEL: test_broadcast_4i64_16i32_maskz:
1671 ; CHECK-NEXT: kmovd %esi, %k1
1672 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1674 %1 = load <4 x i64>, <4 x i64> *%p
1675 %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1676 %3 = bitcast <8 x i64> %2 to <16 x i32>
1677 %mask.cast = bitcast i16 %mask to <16 x i1>
1678 %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer
1682 define <4 x double> @test_broadcast_4f32_4f64_mask(<4 x float> *%p, i8 %mask, <4 x double> %passthru) nounwind {
1683 ; CHECK-LABEL: test_broadcast_4f32_4f64_mask:
1685 ; CHECK-NEXT: kmovd %esi, %k1
1686 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
1688 %1 = load <4 x float>, <4 x float> *%p
1689 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1690 %3 = bitcast <8 x float> %2 to <4 x double>
1691 %mask.cast = bitcast i8 %mask to <8 x i1>
1692 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1693 %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> %passthru
1694 ret <4 x double> %res
1697 define <4 x double> @test_broadcast_4f32_4f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
1698 ; CHECK-LABEL: test_broadcast_4f32_4f64_maskz:
1700 ; CHECK-NEXT: kmovd %esi, %k1
1701 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
1703 %1 = load <4 x float>, <4 x float> *%p
1704 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1705 %3 = bitcast <8 x float> %2 to <4 x double>
1706 %mask.cast = bitcast i8 %mask to <8 x i1>
1707 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1708 %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> zeroinitializer
1709 ret <4 x double> %res
1712 define <4 x i64> @test_broadcast_4i32_4i64_mask(<4 x i32> *%p, i8 %mask, <4 x i64> %passthru) nounwind {
1713 ; CHECK-LABEL: test_broadcast_4i32_4i64_mask:
1715 ; CHECK-NEXT: kmovd %esi, %k1
1716 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
1718 %1 = load <4 x i32>, <4 x i32> *%p
1719 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1720 %3 = bitcast <8 x i32> %2 to <4 x i64>
1721 %mask.cast = bitcast i8 %mask to <8 x i1>
1722 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1723 %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> %passthru
1727 define <4 x i64> @test_broadcast_4i32_4i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
1728 ; CHECK-LABEL: test_broadcast_4i32_4i64_maskz:
1730 ; CHECK-NEXT: kmovd %esi, %k1
1731 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
1733 %1 = load <4 x i32>, <4 x i32> *%p
1734 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1735 %3 = bitcast <8 x i32> %2 to <4 x i64>
1736 %mask.cast = bitcast i8 %mask to <8 x i1>
1737 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1738 %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> zeroinitializer
1742 define <8 x double> @test_broadcast_4f32_8f64_mask(<4 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
1743 ; CHECK-LABEL: test_broadcast_4f32_8f64_mask:
1745 ; CHECK-NEXT: kmovd %esi, %k1
1746 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1748 %1 = load <4 x float>, <4 x float> *%p
1749 %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1750 %3 = bitcast <16 x float> %2 to <8 x double>
1751 %mask.cast = bitcast i8 %mask to <8 x i1>
1752 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
1753 ret <8 x double> %res
1756 define <8 x double> @test_broadcast_4f32_8f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
1757 ; CHECK-LABEL: test_broadcast_4f32_8f64_maskz:
1759 ; CHECK-NEXT: kmovd %esi, %k1
1760 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1762 %1 = load <4 x float>, <4 x float> *%p
1763 %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1764 %3 = bitcast <16 x float> %2 to <8 x double>
1765 %mask.cast = bitcast i8 %mask to <8 x i1>
1766 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer
1767 ret <8 x double> %res
1770 define <8 x i64> @test_broadcast_4i32_8i64_mask(<4 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
1771 ; CHECK-LABEL: test_broadcast_4i32_8i64_mask:
1773 ; CHECK-NEXT: kmovd %esi, %k1
1774 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1776 %1 = load <4 x i32>, <4 x i32> *%p
1777 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1778 %3 = bitcast <16 x i32> %2 to <8 x i64>
1779 %mask.cast = bitcast i8 %mask to <8 x i1>
1780 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
1784 define <8 x i64> @test_broadcast_4i32_8i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
1785 ; CHECK-LABEL: test_broadcast_4i32_8i64_maskz:
1787 ; CHECK-NEXT: kmovd %esi, %k1
1788 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1790 %1 = load <4 x i32>, <4 x i32> *%p
1791 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1792 %3 = bitcast <16 x i32> %2 to <8 x i64>
1793 %mask.cast = bitcast i8 %mask to <8 x i1>
1794 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer
1798 define <8 x double> @test_broadcast_8f32_8f64_mask(<8 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
1799 ; CHECK-LABEL: test_broadcast_8f32_8f64_mask:
1801 ; CHECK-NEXT: kmovd %esi, %k1
1802 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1804 %1 = load <8 x float>, <8 x float> *%p
1805 %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1806 %3 = bitcast <16 x float> %2 to <8 x double>
1807 %mask.cast = bitcast i8 %mask to <8 x i1>
1808 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
1809 ret <8 x double> %res
1812 define <8 x double> @test_broadcast_8f32_8f64_maskz(<8 x float> *%p, i8 %mask) nounwind {
1813 ; CHECK-LABEL: test_broadcast_8f32_8f64_maskz:
1815 ; CHECK-NEXT: kmovd %esi, %k1
1816 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1818 %1 = load <8 x float>, <8 x float> *%p
1819 %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1820 %3 = bitcast <16 x float> %2 to <8 x double>
1821 %mask.cast = bitcast i8 %mask to <8 x i1>
1822 %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer
1823 ret <8 x double> %res
1826 define <8 x i64> @test_broadcast_8i32_8i64_mask(<8 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
1827 ; CHECK-LABEL: test_broadcast_8i32_8i64_mask:
1829 ; CHECK-NEXT: kmovd %esi, %k1
1830 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1832 %1 = load <8 x i32>, <8 x i32> *%p
1833 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1834 %3 = bitcast <16 x i32> %2 to <8 x i64>
1835 %mask.cast = bitcast i8 %mask to <8 x i1>
1836 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
1840 define <8 x i64> @test_broadcast_8i32_8i64_maskz(<8 x i32> *%p, i8 %mask) nounwind {
1841 ; CHECK-LABEL: test_broadcast_8i32_8i64_maskz:
1843 ; CHECK-NEXT: kmovd %esi, %k1
1844 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1846 %1 = load <8 x i32>, <8 x i32> *%p
1847 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1848 %3 = bitcast <16 x i32> %2 to <8 x i64>
1849 %mask.cast = bitcast i8 %mask to <8 x i1>
1850 %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer
1854 define <4 x float> @test_broadcastf32x2_v4f32(<4 x float> %vec, <4 x float> %passthru, i8 %mask) {
1855 ; CHECK-LABEL: test_broadcastf32x2_v4f32:
1857 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1858 ; CHECK-NEXT: kmovd %edi, %k1
1859 ; CHECK-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
1861 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1862 %mask.cast = bitcast i8 %mask to <8 x i1>
1863 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1864 %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> %passthru
1865 ret <4 x float> %res
1868 define <4 x float> @test_broadcastf32x2_v4f32_z(<4 x float> %vec, i8 %mask) {
1869 ; CHECK-LABEL: test_broadcastf32x2_v4f32_z:
1871 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1872 ; CHECK-NEXT: kmovd %edi, %k1
1873 ; CHECK-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z}
1875 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1876 %mask.cast = bitcast i8 %mask to <8 x i1>
1877 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1878 %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> zeroinitializer
1879 ret <4 x float> %res
1882 define <4 x i32> @test_broadcasti32x2_v4i32(<4 x i32> %vec, <4 x i32> %passthru, i8 %mask) {
1883 ; CHECK-LABEL: test_broadcasti32x2_v4i32:
1885 ; CHECK-NEXT: kmovd %edi, %k1
1886 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
1887 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
1889 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1890 %mask.cast = bitcast i8 %mask to <8 x i1>
1891 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1892 %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> %passthru
1896 define <4 x i32> @test_broadcasti32x2_v4i32_z(<4 x i32> %vec, i8 %mask) {
1897 ; CHECK-LABEL: test_broadcasti32x2_v4i32_z:
1899 ; CHECK-NEXT: kmovd %edi, %k1
1900 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
1902 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1903 %mask.cast = bitcast i8 %mask to <8 x i1>
1904 %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1905 %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> zeroinitializer
1909 define <8 x float> @test_broadcastf32x2_v8f32(<8 x float> %vec, <8 x float> %passthru, i8 %mask) {
1910 ; CHECK-LABEL: test_broadcastf32x2_v8f32:
1912 ; CHECK-NEXT: kmovd %edi, %k1
1913 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
1914 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1916 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1917 %mask.cast = bitcast i8 %mask to <8 x i1>
1918 %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> %passthru
1919 ret <8 x float> %res
1922 define <8 x float> @test_broadcastf32x2_v8f32_z(<8 x float> %vec, i8 %mask) {
1923 ; CHECK-LABEL: test_broadcastf32x2_v8f32_z:
1925 ; CHECK-NEXT: kmovd %edi, %k1
1926 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
1928 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1929 %mask.cast = bitcast i8 %mask to <8 x i1>
1930 %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> zeroinitializer
1931 ret <8 x float> %res
1934 define <8 x i32> @test_broadcasti32x2_v8i32(<8 x i32> %vec, <8 x i32> %passthru, i8 %mask) {
1935 ; CHECK-LABEL: test_broadcasti32x2_v8i32:
1937 ; CHECK-NEXT: kmovd %edi, %k1
1938 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
1939 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1941 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1942 %mask.cast = bitcast i8 %mask to <8 x i1>
1943 %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> %passthru
1947 define <8 x i32> @test_broadcasti32x2_v8i32_z(<8 x i32> %vec, i8 %mask) {
1948 ; CHECK-LABEL: test_broadcasti32x2_v8i32_z:
1950 ; CHECK-NEXT: kmovd %edi, %k1
1951 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
1953 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1954 %mask.cast = bitcast i8 %mask to <8 x i1>
1955 %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> zeroinitializer
1959 define <16 x float> @test_broadcastf32x2_v16f32_z(<16 x float> %vec, i16 %mask) {
1960 ; CHECK-LABEL: test_broadcastf32x2_v16f32_z:
1962 ; CHECK-NEXT: kmovd %edi, %k1
1963 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1965 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1966 %mask.cast = bitcast i16 %mask to <16 x i1>
1967 %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> zeroinitializer
1968 ret <16 x float> %res
1971 define <16 x i32> @test_broadcasti32x2_v16i32(<16 x i32> %vec, <16 x i32> %passthru, i16 %mask) {
1972 ; CHECK-LABEL: test_broadcasti32x2_v16i32:
1974 ; CHECK-NEXT: kmovd %edi, %k1
1975 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1976 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1978 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1979 %mask.cast = bitcast i16 %mask to <16 x i1>
1980 %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> %passthru
1984 define <16 x float> @test_broadcastf32x2_v16f32(<16 x float> %vec, <16 x float> %passthru, i16 %mask) {
1985 ; CHECK-LABEL: test_broadcastf32x2_v16f32:
1987 ; CHECK-NEXT: kmovd %edi, %k1
1988 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
1989 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1991 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1992 %mask.cast = bitcast i16 %mask to <16 x i1>
1993 %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> %passthru
1994 ret <16 x float> %res
1997 define <16 x i32> @test_broadcasti32x2_v16i32_z(<16 x i32> %vec, i16 %mask) {
1998 ; CHECK-LABEL: test_broadcasti32x2_v16i32_z:
2000 ; CHECK-NEXT: kmovd %edi, %k1
2001 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2003 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
2004 %mask.cast = bitcast i16 %mask to <16 x i1>
2005 %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> zeroinitializer
2009 define <16 x i8> @mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) {
2010 ; CHECK-LABEL: mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16:
2012 ; CHECK-NEXT: kmovd %edi, %k1
2013 ; CHECK-NEXT: vpalignr {{.*#+}} xmm2 {%k1} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2014 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
2016 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
2017 %mask.cast = bitcast i16 %mask to <16 x i1>
2018 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru
2022 define <16 x i8> @maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2023 ; CHECK-LABEL: maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16:
2025 ; CHECK-NEXT: kmovd %edi, %k1
2026 ; CHECK-NEXT: vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
2028 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
2029 %mask.cast = bitcast i16 %mask to <16 x i1>
2030 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer
2034 define <16 x i8> @mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) {
2035 ; CHECK-LABEL: mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19:
2037 ; CHECK-NEXT: kmovd %edi, %k1
2038 ; CHECK-NEXT: vpalignr {{.*#+}} xmm2 {%k1} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
2039 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
2041 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
2042 %mask.cast = bitcast i16 %mask to <16 x i1>
2043 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru
2047 define <16 x i8> @maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2048 ; CHECK-LABEL: maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19:
2050 ; CHECK-NEXT: kmovd %edi, %k1
2051 ; CHECK-NEXT: vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
2053 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
2054 %mask.cast = bitcast i16 %mask to <16 x i1>
2055 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer
2059 define <16 x i8> @mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) {
2060 ; CHECK-LABEL: mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23:
2062 ; CHECK-NEXT: kmovd %edi, %k1
2063 ; CHECK-NEXT: vpalignr {{.*#+}} xmm2 {%k1} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
2064 ; CHECK-NEXT: vmovdqa %xmm2, %xmm0
2066 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
2067 %mask.cast = bitcast i16 %mask to <16 x i1>
2068 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru
2072 define <16 x i8> @maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
2073 ; CHECK-LABEL: maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23:
2075 ; CHECK-NEXT: kmovd %edi, %k1
2076 ; CHECK-NEXT: vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
2078 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
2079 %mask.cast = bitcast i16 %mask to <16 x i1>
2080 %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer