1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X32
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c
8 define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x float> %__B) {
9 ; X32-LABEL: test_mm512_shuffle_f32x4:
10 ; X32: # BB#0: # %entry
11 ; X32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
14 ; X64-LABEL: test_mm512_shuffle_f32x4:
15 ; X64: # BB#0: # %entry
16 ; X64-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
19 %shuffle = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
20 ret <16 x float> %shuffle
24 define <16 x float> @test_mm512_mask_shuffle_f32x4(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
25 ; X32-LABEL: test_mm512_mask_shuffle_f32x4:
26 ; X32: # BB#0: # %entry
27 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
28 ; X32-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
31 ; X64-LABEL: test_mm512_mask_shuffle_f32x4:
32 ; X64: # BB#0: # %entry
33 ; X64-NEXT: kmovw %edi, %k1
34 ; X64-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
37 %shuffle = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
38 %0 = bitcast i16 %__U to <16 x i1>
39 %1 = select <16 x i1> %0, <16 x float> %shuffle, <16 x float> %__W
43 define <16 x float> @test_mm512_maskz_shuffle_f32x4(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
44 ; X32-LABEL: test_mm512_maskz_shuffle_f32x4:
45 ; X32: # BB#0: # %entry
46 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
47 ; X32-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
50 ; X64-LABEL: test_mm512_maskz_shuffle_f32x4:
51 ; X64: # BB#0: # %entry
52 ; X64-NEXT: kmovw %edi, %k1
53 ; X64-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
56 %shuffle = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
57 %0 = bitcast i16 %__U to <16 x i1>
58 %1 = select <16 x i1> %0, <16 x float> %shuffle, <16 x float> zeroinitializer
62 define <8 x double> @test_mm512_shuffle_f64x2(<8 x double> %__A, <8 x double> %__B) {
63 ; X32-LABEL: test_mm512_shuffle_f64x2:
64 ; X32: # BB#0: # %entry
65 ; X32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
68 ; X64-LABEL: test_mm512_shuffle_f64x2:
69 ; X64: # BB#0: # %entry
70 ; X64-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
73 %shuffle = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
74 ret <8 x double> %shuffle
77 define <8 x double> @test_mm512_mask_shuffle_f64x2(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
78 ; X32-LABEL: test_mm512_mask_shuffle_f64x2:
79 ; X32: # BB#0: # %entry
80 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
81 ; X32-NEXT: kmovw %eax, %k1
82 ; X32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
85 ; X64-LABEL: test_mm512_mask_shuffle_f64x2:
86 ; X64: # BB#0: # %entry
87 ; X64-NEXT: kmovw %edi, %k1
88 ; X64-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
91 %shuffle = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
92 %0 = bitcast i8 %__U to <8 x i1>
93 %1 = select <8 x i1> %0, <8 x double> %shuffle, <8 x double> %__W
97 define <8 x double> @test_mm512_maskz_shuffle_f64x2(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
98 ; X32-LABEL: test_mm512_maskz_shuffle_f64x2:
99 ; X32: # BB#0: # %entry
100 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
101 ; X32-NEXT: kmovw %eax, %k1
102 ; X32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
105 ; X64-LABEL: test_mm512_maskz_shuffle_f64x2:
106 ; X64: # BB#0: # %entry
107 ; X64-NEXT: kmovw %edi, %k1
108 ; X64-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
111 %shuffle = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
112 %0 = bitcast i8 %__U to <8 x i1>
113 %1 = select <8 x i1> %0, <8 x double> %shuffle, <8 x double> zeroinitializer
117 define <8 x i64> @test_mm512_shuffle_i32x4(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
118 ; X32-LABEL: test_mm512_shuffle_i32x4:
119 ; X32: # BB#0: # %entry
120 ; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
123 ; X64-LABEL: test_mm512_shuffle_i32x4:
124 ; X64: # BB#0: # %entry
125 ; X64-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
128 %shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
129 ret <8 x i64> %shuffle
132 define <8 x i64> @test_mm512_mask_shuffle_i32x4(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
133 ; X32-LABEL: test_mm512_mask_shuffle_i32x4:
134 ; X32: # BB#0: # %entry
135 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
136 ; X32-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
139 ; X64-LABEL: test_mm512_mask_shuffle_i32x4:
140 ; X64: # BB#0: # %entry
141 ; X64-NEXT: kmovw %edi, %k1
142 ; X64-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
145 %shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
146 %0 = bitcast <8 x i64> %shuffle to <16 x i32>
147 %1 = bitcast <8 x i64> %__W to <16 x i32>
148 %2 = bitcast i16 %__U to <16 x i1>
149 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
150 %4 = bitcast <16 x i32> %3 to <8 x i64>
154 define <8 x i64> @test_mm512_maskz_shuffle_i32x4(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
155 ; X32-LABEL: test_mm512_maskz_shuffle_i32x4:
156 ; X32: # BB#0: # %entry
157 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
158 ; X32-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
161 ; X64-LABEL: test_mm512_maskz_shuffle_i32x4:
162 ; X64: # BB#0: # %entry
163 ; X64-NEXT: kmovw %edi, %k1
164 ; X64-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
167 %shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
168 %0 = bitcast <8 x i64> %shuffle to <16 x i32>
169 %1 = bitcast i16 %__U to <16 x i1>
170 %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer
171 %3 = bitcast <16 x i32> %2 to <8 x i64>
175 define <8 x i64> @test_mm512_shuffle_i64x2(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
176 ; X32-LABEL: test_mm512_shuffle_i64x2:
177 ; X32: # BB#0: # %entry
178 ; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
181 ; X64-LABEL: test_mm512_shuffle_i64x2:
182 ; X64: # BB#0: # %entry
183 ; X64-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
186 %shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
187 ret <8 x i64> %shuffle
190 define <8 x i64> @test_mm512_mask_shuffle_i64x2(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
191 ; X32-LABEL: test_mm512_mask_shuffle_i64x2:
192 ; X32: # BB#0: # %entry
193 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
194 ; X32-NEXT: kmovw %eax, %k1
195 ; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
198 ; X64-LABEL: test_mm512_mask_shuffle_i64x2:
199 ; X64: # BB#0: # %entry
200 ; X64-NEXT: kmovw %edi, %k1
201 ; X64-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
204 %shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
205 %0 = bitcast i8 %__U to <8 x i1>
206 %1 = select <8 x i1> %0, <8 x i64> %shuffle, <8 x i64> %__W
210 define <8 x i64> @test_mm512_maskz_shuffle_i64x2(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
211 ; X32-LABEL: test_mm512_maskz_shuffle_i64x2:
212 ; X32: # BB#0: # %entry
213 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
214 ; X32-NEXT: kmovw %eax, %k1
215 ; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
218 ; X64-LABEL: test_mm512_maskz_shuffle_i64x2:
219 ; X64: # BB#0: # %entry
220 ; X64-NEXT: kmovw %edi, %k1
221 ; X64-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
224 %shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
225 %0 = bitcast i8 %__U to <8 x i1>
226 %1 = select <8 x i1> %0, <8 x i64> %shuffle, <8 x i64> zeroinitializer
231 define zeroext i16 @test_mm512_testn_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) {
232 ; X32-LABEL: test_mm512_testn_epi32_mask:
233 ; X32: # BB#0: # %entry
234 ; X32-NEXT: vptestnmd %zmm0, %zmm1, %k0
235 ; X32-NEXT: kmovw %k0, %eax
236 ; X32-NEXT: movzwl %ax, %eax
237 ; X32-NEXT: vzeroupper
240 ; X64-LABEL: test_mm512_testn_epi32_mask:
241 ; X64: # BB#0: # %entry
242 ; X64-NEXT: vptestnmd %zmm0, %zmm1, %k0
243 ; X64-NEXT: kmovw %k0, %eax
244 ; X64-NEXT: movzwl %ax, %eax
245 ; X64-NEXT: vzeroupper
248 %and1.i.i = and <8 x i64> %__B, %__A
249 %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
250 %1 = icmp eq <16 x i32> %0, zeroinitializer
251 %2 = bitcast <16 x i1> %1 to i16
255 define zeroext i16 @test_mm512_mask_testn_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
256 ; X32-LABEL: test_mm512_mask_testn_epi32_mask:
257 ; X32: # BB#0: # %entry
258 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
259 ; X32-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
260 ; X32-NEXT: kmovw %k0, %eax
261 ; X32-NEXT: movzwl %ax, %eax
262 ; X32-NEXT: vzeroupper
265 ; X64-LABEL: test_mm512_mask_testn_epi32_mask:
266 ; X64: # BB#0: # %entry
267 ; X64-NEXT: kmovw %edi, %k1
268 ; X64-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
269 ; X64-NEXT: kmovw %k0, %eax
270 ; X64-NEXT: movzwl %ax, %eax
271 ; X64-NEXT: vzeroupper
274 %and1.i.i = and <8 x i64> %__B, %__A
275 %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
276 %1 = icmp eq <16 x i32> %0, zeroinitializer
277 %2 = bitcast i16 %__U to <16 x i1>
278 %3 = and <16 x i1> %1, %2
279 %4 = bitcast <16 x i1> %3 to i16
283 define zeroext i8 @test_mm512_testn_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) {
284 ; X32-LABEL: test_mm512_testn_epi64_mask:
285 ; X32: # BB#0: # %entry
286 ; X32-NEXT: vptestnmq %zmm0, %zmm1, %k0
287 ; X32-NEXT: kmovw %k0, %eax
288 ; X32-NEXT: movzbl %al, %eax
289 ; X32-NEXT: vzeroupper
292 ; X64-LABEL: test_mm512_testn_epi64_mask:
293 ; X64: # BB#0: # %entry
294 ; X64-NEXT: vptestnmq %zmm0, %zmm1, %k0
295 ; X64-NEXT: kmovw %k0, %eax
296 ; X64-NEXT: movzbl %al, %eax
297 ; X64-NEXT: vzeroupper
300 %and1.i.i = and <8 x i64> %__B, %__A
301 %0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer
302 %1 = bitcast <8 x i1> %0 to i8
306 define zeroext i8 @test_mm512_mask_testn_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
307 ; X32-LABEL: test_mm512_mask_testn_epi64_mask:
308 ; X32: # BB#0: # %entry
309 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
310 ; X32-NEXT: kmovw %eax, %k1
311 ; X32-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1}
312 ; X32-NEXT: kmovw %k0, %eax
313 ; X32-NEXT: movzbl %al, %eax
314 ; X32-NEXT: vzeroupper
317 ; X64-LABEL: test_mm512_mask_testn_epi64_mask:
318 ; X64: # BB#0: # %entry
319 ; X64-NEXT: kmovw %edi, %k1
320 ; X64-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1}
321 ; X64-NEXT: kmovw %k0, %eax
322 ; X64-NEXT: movzbl %al, %eax
323 ; X64-NEXT: vzeroupper
326 %and1.i.i = and <8 x i64> %__B, %__A
327 %0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer
328 %1 = bitcast i8 %__U to <8 x i1>
329 %2 = and <8 x i1> %0, %1
330 %3 = bitcast <8 x i1> %2 to i8
334 define zeroext i16 @test_mm512_mask_test_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
335 ; X32-LABEL: test_mm512_mask_test_epi32_mask:
336 ; X32: # BB#0: # %entry
337 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
338 ; X32-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
339 ; X32-NEXT: kmovw %k0, %eax
340 ; X32-NEXT: movzwl %ax, %eax
341 ; X32-NEXT: vzeroupper
344 ; X64-LABEL: test_mm512_mask_test_epi32_mask:
345 ; X64: # BB#0: # %entry
346 ; X64-NEXT: kmovw %edi, %k1
347 ; X64-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
348 ; X64-NEXT: kmovw %k0, %eax
349 ; X64-NEXT: movzwl %ax, %eax
350 ; X64-NEXT: vzeroupper
353 %and1.i.i = and <8 x i64> %__B, %__A
354 %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
355 %1 = icmp ne <16 x i32> %0, zeroinitializer
356 %2 = bitcast i16 %__U to <16 x i1>
357 %3 = and <16 x i1> %1, %2
358 %4 = bitcast <16 x i1> %3 to i16
362 define zeroext i8 @test_mm512_mask_test_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
363 ; X32-LABEL: test_mm512_mask_test_epi64_mask:
364 ; X32: # BB#0: # %entry
365 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
366 ; X32-NEXT: kmovw %eax, %k1
367 ; X32-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
368 ; X32-NEXT: kmovw %k0, %eax
369 ; X32-NEXT: movzbl %al, %eax
370 ; X32-NEXT: vzeroupper
373 ; X64-LABEL: test_mm512_mask_test_epi64_mask:
374 ; X64: # BB#0: # %entry
375 ; X64-NEXT: kmovw %edi, %k1
376 ; X64-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
377 ; X64-NEXT: kmovw %k0, %eax
378 ; X64-NEXT: movzbl %al, %eax
379 ; X64-NEXT: vzeroupper
382 %and1.i.i = and <8 x i64> %__B, %__A
383 %0 = icmp ne <8 x i64> %and1.i.i, zeroinitializer
384 %1 = bitcast i8 %__U to <8 x i1>
385 %2 = and <8 x i1> %0, %1
386 %3 = bitcast <8 x i1> %2 to i8
390 define <8 x i64> @test_mm512_mask_set1_epi32(<8 x i64> %__O, i16 zeroext %__M, i32 %__A) {
391 ; X32-LABEL: test_mm512_mask_set1_epi32:
392 ; X32: # BB#0: # %entry
393 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
394 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
395 ; X32-NEXT: vpbroadcastd %eax, %zmm0 {%k1}
398 ; X64-LABEL: test_mm512_mask_set1_epi32:
399 ; X64: # BB#0: # %entry
400 ; X64-NEXT: kmovw %edi, %k1
401 ; X64-NEXT: vpbroadcastd %esi, %zmm0 {%k1}
404 %vecinit.i.i = insertelement <16 x i32> undef, i32 %__A, i32 0
405 %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
406 %0 = bitcast <8 x i64> %__O to <16 x i32>
407 %1 = bitcast i16 %__M to <16 x i1>
408 %2 = select <16 x i1> %1, <16 x i32> %vecinit15.i.i, <16 x i32> %0
409 %3 = bitcast <16 x i32> %2 to <8 x i64>
413 define <8 x i64> @test_mm512_maskz_set1_epi32(i16 zeroext %__M, i32 %__A) {
414 ; X32-LABEL: test_mm512_maskz_set1_epi32:
415 ; X32: # BB#0: # %entry
416 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
417 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
418 ; X32-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
421 ; X64-LABEL: test_mm512_maskz_set1_epi32:
422 ; X64: # BB#0: # %entry
423 ; X64-NEXT: kmovw %edi, %k1
424 ; X64-NEXT: vpbroadcastd %esi, %zmm0 {%k1} {z}
427 %vecinit.i.i = insertelement <16 x i32> undef, i32 %__A, i32 0
428 %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
429 %0 = bitcast i16 %__M to <16 x i1>
430 %1 = select <16 x i1> %0, <16 x i32> %vecinit15.i.i, <16 x i32> zeroinitializer
431 %2 = bitcast <16 x i32> %1 to <8 x i64>
435 define <8 x i64> @test_mm512_mask_set1_epi64(<8 x i64> %__O, i8 zeroext %__M, i64 %__A) {
436 ; X32-LABEL: test_mm512_mask_set1_epi64:
437 ; X32: # BB#0: # %entry
438 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
439 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
440 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
441 ; X32-NEXT: vmovd %edx, %xmm1
442 ; X32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
443 ; X32-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1
444 ; X32-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1
445 ; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
446 ; X32-NEXT: kmovw %eax, %k1
447 ; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm0 {%k1}
450 ; X64-LABEL: test_mm512_mask_set1_epi64:
451 ; X64: # BB#0: # %entry
452 ; X64-NEXT: kmovw %edi, %k1
453 ; X64-NEXT: vpbroadcastq %rsi, %zmm0 {%k1}
456 %vecinit.i.i = insertelement <8 x i64> undef, i64 %__A, i32 0
457 %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
458 %0 = bitcast i8 %__M to <8 x i1>
459 %1 = select <8 x i1> %0, <8 x i64> %vecinit7.i.i, <8 x i64> %__O
463 define <8 x i64> @test_mm512_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) {
464 ; X32-LABEL: test_mm512_maskz_set1_epi64:
465 ; X32: # BB#0: # %entry
466 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
467 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
468 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
469 ; X32-NEXT: vmovd %edx, %xmm0
470 ; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
471 ; X32-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
472 ; X32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
473 ; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
474 ; X32-NEXT: kmovw %eax, %k1
475 ; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z}
478 ; X64-LABEL: test_mm512_maskz_set1_epi64:
479 ; X64: # BB#0: # %entry
480 ; X64-NEXT: kmovw %edi, %k1
481 ; X64-NEXT: vpbroadcastq %rsi, %zmm0 {%k1} {z}
484 %vecinit.i.i = insertelement <8 x i64> undef, i64 %__A, i32 0
485 %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
486 %0 = bitcast i8 %__M to <8 x i1>
487 %1 = select <8 x i1> %0, <8 x i64> %vecinit7.i.i, <8 x i64> zeroinitializer
492 define <8 x i64> @test_mm512_broadcastd_epi32(<2 x i64> %a0) {
493 ; X32-LABEL: test_mm512_broadcastd_epi32:
495 ; X32-NEXT: vbroadcastss %xmm0, %zmm0
498 ; X64-LABEL: test_mm512_broadcastd_epi32:
500 ; X64-NEXT: vbroadcastss %xmm0, %zmm0
502 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
503 %res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <16 x i32> zeroinitializer
504 %res1 = bitcast <16 x i32> %res0 to <8 x i64>
508 define <8 x i64> @test_mm512_mask_broadcastd_epi32(<8 x i64> %a0, i16 %a1, <2 x i64> %a2) {
509 ; X32-LABEL: test_mm512_mask_broadcastd_epi32:
511 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
512 ; X32-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1}
515 ; X64-LABEL: test_mm512_mask_broadcastd_epi32:
517 ; X64-NEXT: kmovw %edi, %k1
518 ; X64-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1}
520 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
521 %arg1 = bitcast i16 %a1 to <16 x i1>
522 %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
523 %res0 = shufflevector <4 x i32> %arg2, <4 x i32> undef, <16 x i32> zeroinitializer
524 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
525 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
529 define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) {
530 ; X32-LABEL: test_mm512_maskz_broadcastd_epi32:
532 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
533 ; X32-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z}
536 ; X64-LABEL: test_mm512_maskz_broadcastd_epi32:
538 ; X64-NEXT: kmovw %edi, %k1
539 ; X64-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z}
541 %arg0 = bitcast i16 %a0 to <16 x i1>
542 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
543 %res0 = shufflevector <4 x i32> %arg1, <4 x i32> undef, <16 x i32> zeroinitializer
544 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
545 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
549 define <8 x i64> @test_mm512_broadcastq_epi64(<2 x i64> %a0) {
550 ; X32-LABEL: test_mm512_broadcastq_epi64:
552 ; X32-NEXT: vbroadcastsd %xmm0, %zmm0
555 ; X64-LABEL: test_mm512_broadcastq_epi64:
557 ; X64-NEXT: vbroadcastsd %xmm0, %zmm0
559 %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <8 x i32> zeroinitializer
563 define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i64> %a2) {
564 ; X32-LABEL: test_mm512_mask_broadcastq_epi64:
566 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
567 ; X32-NEXT: kmovw %eax, %k1
568 ; X32-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1}
571 ; X64-LABEL: test_mm512_mask_broadcastq_epi64:
573 ; X64-NEXT: kmovw %edi, %k1
574 ; X64-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1}
576 %arg1 = bitcast i8 %a1 to <8 x i1>
577 %res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <8 x i32> zeroinitializer
578 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
582 define <8 x i64> @test_mm512_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
583 ; X32-LABEL: test_mm512_maskz_broadcastq_epi64:
585 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
586 ; X32-NEXT: kmovw %eax, %k1
587 ; X32-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z}
590 ; X64-LABEL: test_mm512_maskz_broadcastq_epi64:
592 ; X64-NEXT: kmovw %edi, %k1
593 ; X64-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z}
595 %arg0 = bitcast i8 %a0 to <8 x i1>
596 %res0 = shufflevector <2 x i64> %a1, <2 x i64> undef, <8 x i32> zeroinitializer
597 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
601 define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a0) {
602 ; X32-LABEL: test_mm512_broadcastsd_pd:
604 ; X32-NEXT: vbroadcastsd %xmm0, %zmm0
607 ; X64-LABEL: test_mm512_broadcastsd_pd:
609 ; X64-NEXT: vbroadcastsd %xmm0, %zmm0
611 %res = shufflevector <2 x double> %a0, <2 x double> undef, <8 x i32> zeroinitializer
612 ret <8 x double> %res
615 define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2 x double> %a2) {
616 ; X32-LABEL: test_mm512_mask_broadcastsd_pd:
618 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
619 ; X32-NEXT: kmovw %eax, %k1
620 ; X32-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1}
623 ; X64-LABEL: test_mm512_mask_broadcastsd_pd:
625 ; X64-NEXT: kmovw %edi, %k1
626 ; X64-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1}
628 %arg1 = bitcast i8 %a1 to <8 x i1>
629 %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <8 x i32> zeroinitializer
630 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
631 ret <8 x double> %res1
634 define <8 x double> @test_mm512_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
635 ; X32-LABEL: test_mm512_maskz_broadcastsd_pd:
637 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
638 ; X32-NEXT: kmovw %eax, %k1
639 ; X32-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
642 ; X64-LABEL: test_mm512_maskz_broadcastsd_pd:
644 ; X64-NEXT: kmovw %edi, %k1
645 ; X64-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
647 %arg0 = bitcast i8 %a0 to <8 x i1>
648 %res0 = shufflevector <2 x double> %a1, <2 x double> undef, <8 x i32> zeroinitializer
649 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
650 ret <8 x double> %res1
653 define <16 x float> @test_mm512_broadcastss_ps(<4 x float> %a0) {
654 ; X32-LABEL: test_mm512_broadcastss_ps:
656 ; X32-NEXT: vbroadcastss %xmm0, %zmm0
659 ; X64-LABEL: test_mm512_broadcastss_ps:
661 ; X64-NEXT: vbroadcastss %xmm0, %zmm0
663 %res = shufflevector <4 x float> %a0, <4 x float> undef, <16 x i32> zeroinitializer
664 ret <16 x float> %res
667 define <16 x float> @test_mm512_mask_broadcastss_ps(<16 x float> %a0, i16 %a1, <4 x float> %a2) {
668 ; X32-LABEL: test_mm512_mask_broadcastss_ps:
670 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
671 ; X32-NEXT: vbroadcastss %xmm1, %zmm0 {%k1}
674 ; X64-LABEL: test_mm512_mask_broadcastss_ps:
676 ; X64-NEXT: kmovw %edi, %k1
677 ; X64-NEXT: vbroadcastss %xmm1, %zmm0 {%k1}
679 %arg1 = bitcast i16 %a1 to <16 x i1>
680 %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <16 x i32> zeroinitializer
681 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
682 ret <16 x float> %res1
685 define <16 x float> @test_mm512_maskz_broadcastss_ps(i16 %a0, <4 x float> %a1) {
686 ; X32-LABEL: test_mm512_maskz_broadcastss_ps:
688 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
689 ; X32-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
692 ; X64-LABEL: test_mm512_maskz_broadcastss_ps:
694 ; X64-NEXT: kmovw %edi, %k1
695 ; X64-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
697 %arg0 = bitcast i16 %a0 to <16 x i1>
698 %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <16 x i32> zeroinitializer
699 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
700 ret <16 x float> %res1
703 define <8 x double> @test_mm512_movddup_pd(<8 x double> %a0) {
704 ; X32-LABEL: test_mm512_movddup_pd:
706 ; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
709 ; X64-LABEL: test_mm512_movddup_pd:
711 ; X64-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
713 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
714 ret <8 x double> %res
717 define <8 x double> @test_mm512_mask_movddup_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
718 ; X32-LABEL: test_mm512_mask_movddup_pd:
720 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
721 ; X32-NEXT: kmovw %eax, %k1
722 ; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
725 ; X64-LABEL: test_mm512_mask_movddup_pd:
727 ; X64-NEXT: kmovw %edi, %k1
728 ; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
730 %arg1 = bitcast i8 %a1 to <8 x i1>
731 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
732 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
733 ret <8 x double> %res1
736 define <8 x double> @test_mm512_maskz_movddup_pd(i8 %a0, <8 x double> %a1) {
737 ; X32-LABEL: test_mm512_maskz_movddup_pd:
739 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
740 ; X32-NEXT: kmovw %eax, %k1
741 ; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
744 ; X64-LABEL: test_mm512_maskz_movddup_pd:
746 ; X64-NEXT: kmovw %edi, %k1
747 ; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
749 %arg0 = bitcast i8 %a0 to <8 x i1>
750 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
751 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
752 ret <8 x double> %res1
755 define <16 x float> @test_mm512_movehdup_ps(<16 x float> %a0) {
756 ; X32-LABEL: test_mm512_movehdup_ps:
758 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
761 ; X64-LABEL: test_mm512_movehdup_ps:
763 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
765 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
766 ret <16 x float> %res
769 define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
770 ; X32-LABEL: test_mm512_mask_movehdup_ps:
772 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
773 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
776 ; X64-LABEL: test_mm512_mask_movehdup_ps:
778 ; X64-NEXT: kmovw %edi, %k1
779 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
781 %arg1 = bitcast i16 %a1 to <16 x i1>
782 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
783 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
784 ret <16 x float> %res1
787 define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) {
788 ; X32-LABEL: test_mm512_maskz_movehdup_ps:
790 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
791 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
794 ; X64-LABEL: test_mm512_maskz_movehdup_ps:
796 ; X64-NEXT: kmovw %edi, %k1
797 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
799 %arg0 = bitcast i16 %a0 to <16 x i1>
800 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
801 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
802 ret <16 x float> %res1
805 define <16 x float> @test_mm512_moveldup_ps(<16 x float> %a0) {
806 ; X32-LABEL: test_mm512_moveldup_ps:
808 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
811 ; X64-LABEL: test_mm512_moveldup_ps:
813 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
815 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
816 ret <16 x float> %res
819 define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
820 ; X32-LABEL: test_mm512_mask_moveldup_ps:
822 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
823 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
826 ; X64-LABEL: test_mm512_mask_moveldup_ps:
828 ; X64-NEXT: kmovw %edi, %k1
829 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
831 %arg1 = bitcast i16 %a1 to <16 x i1>
832 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
833 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
834 ret <16 x float> %res1
837 define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) {
838 ; X32-LABEL: test_mm512_maskz_moveldup_ps:
840 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
841 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
844 ; X64-LABEL: test_mm512_maskz_moveldup_ps:
846 ; X64-NEXT: kmovw %edi, %k1
847 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
849 %arg0 = bitcast i16 %a0 to <16 x i1>
850 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
851 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
852 ret <16 x float> %res1
855 define <8 x double> @test_mm512_permute_pd(<8 x double> %a0) {
856 ; X32-LABEL: test_mm512_permute_pd:
858 ; X32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6]
861 ; X64-LABEL: test_mm512_permute_pd:
863 ; X64-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6]
865 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
866 ret <8 x double> %res
869 define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
870 ; X32-LABEL: test_mm512_mask_permute_pd:
872 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
873 ; X32-NEXT: kmovw %eax, %k1
874 ; X32-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
877 ; X64-LABEL: test_mm512_mask_permute_pd:
879 ; X64-NEXT: kmovw %edi, %k1
880 ; X64-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
882 %arg1 = bitcast i8 %a1 to <8 x i1>
883 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
884 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
885 ret <8 x double> %res1
888 define <8 x double> @test_mm512_maskz_permute_pd(i8 %a0, <8 x double> %a1) {
889 ; X32-LABEL: test_mm512_maskz_permute_pd:
891 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
892 ; X32-NEXT: kmovw %eax, %k1
893 ; X32-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
896 ; X64-LABEL: test_mm512_maskz_permute_pd:
898 ; X64-NEXT: kmovw %edi, %k1
899 ; X64-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
901 %arg0 = bitcast i8 %a0 to <8 x i1>
902 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
903 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
904 ret <8 x double> %res1
907 define <16 x float> @test_mm512_permute_ps(<16 x float> %a0) {
908 ; X32-LABEL: test_mm512_permute_ps:
910 ; X32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
913 ; X64-LABEL: test_mm512_permute_ps:
915 ; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
917 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
918 ret <16 x float> %res
921 define <16 x float> @test_mm512_mask_permute_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
922 ; X32-LABEL: test_mm512_mask_permute_ps:
924 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
925 ; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
928 ; X64-LABEL: test_mm512_mask_permute_ps:
930 ; X64-NEXT: kmovw %edi, %k1
931 ; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
933 %arg1 = bitcast i16 %a1 to <16 x i1>
934 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
935 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
936 ret <16 x float> %res1
939 define <16 x float> @test_mm512_maskz_permute_ps(i16 %a0, <16 x float> %a1) {
940 ; X32-LABEL: test_mm512_maskz_permute_ps:
942 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
943 ; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
946 ; X64-LABEL: test_mm512_maskz_permute_ps:
948 ; X64-NEXT: kmovw %edi, %k1
949 ; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
951 %arg0 = bitcast i16 %a0 to <16 x i1>
952 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
953 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
954 ret <16 x float> %res1
957 define <8 x i64> @test_mm512_permutex_epi64(<8 x i64> %a0) {
958 ; X32-LABEL: test_mm512_permutex_epi64:
960 ; X32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
963 ; X64-LABEL: test_mm512_permutex_epi64:
965 ; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
967 %res = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
971 define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2) {
972 ; X32-LABEL: test_mm512_mask_permutex_epi64:
974 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
975 ; X32-NEXT: kmovw %eax, %k1
976 ; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
979 ; X64-LABEL: test_mm512_mask_permutex_epi64:
981 ; X64-NEXT: kmovw %edi, %k1
982 ; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
984 %arg1 = bitcast i8 %a1 to <8 x i1>
985 %res0 = shufflevector <8 x i64> %a2, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
986 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
990 define <8 x i64> @test_mm512_maskz_permutex_epi64(i8 %a0, <8 x i64> %a1) {
991 ; X32-LABEL: test_mm512_maskz_permutex_epi64:
993 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
994 ; X32-NEXT: kmovw %eax, %k1
995 ; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
998 ; X64-LABEL: test_mm512_maskz_permutex_epi64:
1000 ; X64-NEXT: kmovw %edi, %k1
1001 ; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
1003 %arg0 = bitcast i8 %a0 to <8 x i1>
1004 %res0 = shufflevector <8 x i64> %a1, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1005 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
1009 define <8 x double> @test_mm512_permutex_pd(<8 x double> %a0) {
1010 ; X32-LABEL: test_mm512_permutex_pd:
1012 ; X32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
1015 ; X64-LABEL: test_mm512_permutex_pd:
1017 ; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
1019 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1020 ret <8 x double> %res
1023 define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
1024 ; X32-LABEL: test_mm512_mask_permutex_pd:
1026 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1027 ; X32-NEXT: kmovw %eax, %k1
1028 ; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
1031 ; X64-LABEL: test_mm512_mask_permutex_pd:
1033 ; X64-NEXT: kmovw %edi, %k1
1034 ; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
1036 %arg1 = bitcast i8 %a1 to <8 x i1>
1037 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1038 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
1039 ret <8 x double> %res1
1042 define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) {
1043 ; X32-LABEL: test_mm512_maskz_permutex_pd:
1045 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1046 ; X32-NEXT: kmovw %eax, %k1
1047 ; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
1050 ; X64-LABEL: test_mm512_maskz_permutex_pd:
1052 ; X64-NEXT: kmovw %edi, %k1
1053 ; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
1055 %arg0 = bitcast i8 %a0 to <8 x i1>
1056 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1057 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
1058 ret <8 x double> %res1
1061 define <8 x i64> @test_mm512_shuffle_epi32(<8 x i64> %a0) {
1062 ; X32-LABEL: test_mm512_shuffle_epi32:
1064 ; X32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
1067 ; X64-LABEL: test_mm512_shuffle_epi32:
1069 ; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
1071 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
1072 %res0 = shufflevector <16 x i32> %arg0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
1073 %res1 = bitcast <16 x i32> %res0 to <8 x i64>
1077 define <8 x i64> @test_mm512_mask_shuffle_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2) {
1078 ; X32-LABEL: test_mm512_mask_shuffle_epi32:
1080 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1081 ; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
1084 ; X64-LABEL: test_mm512_mask_shuffle_epi32:
1086 ; X64-NEXT: kmovw %edi, %k1
1087 ; X64-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
1089 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
1090 %arg1 = bitcast i16 %a1 to <16 x i1>
1091 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
1092 %res0 = shufflevector <16 x i32> %arg2, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
1093 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
1094 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
1098 define <8 x i64> @test_mm512_maskz_shuffle_epi32(i16 %a0, <8 x i64> %a1) {
1099 ; X32-LABEL: test_mm512_maskz_shuffle_epi32:
1101 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1102 ; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
1105 ; X64-LABEL: test_mm512_maskz_shuffle_epi32:
1107 ; X64-NEXT: kmovw %edi, %k1
1108 ; X64-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
1110 %arg0 = bitcast i16 %a0 to <16 x i1>
1111 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
1112 %res0 = shufflevector <16 x i32> %arg1, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
1113 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
1114 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
1118 define <8 x double> @test_mm512_shuffle_pd(<8 x double> %a0, <8 x double> %a1) {
1119 ; X32-LABEL: test_mm512_shuffle_pd:
1121 ; X32-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1124 ; X64-LABEL: test_mm512_shuffle_pd:
1126 ; X64-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1128 %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
1129 ret <8 x double> %res
1132 define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
1133 ; X32-LABEL: test_mm512_mask_shuffle_pd:
1135 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1136 ; X32-NEXT: kmovw %eax, %k1
1137 ; X32-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1140 ; X64-LABEL: test_mm512_mask_shuffle_pd:
1142 ; X64-NEXT: kmovw %edi, %k1
1143 ; X64-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1145 %arg1 = bitcast i8 %a1 to <8 x i1>
1146 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
1147 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
1148 ret <8 x double> %res1
1151 define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
1152 ; X32-LABEL: test_mm512_maskz_shuffle_pd:
1154 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1155 ; X32-NEXT: kmovw %eax, %k1
1156 ; X32-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1159 ; X64-LABEL: test_mm512_maskz_shuffle_pd:
1161 ; X64-NEXT: kmovw %edi, %k1
1162 ; X64-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1164 %arg0 = bitcast i8 %a0 to <8 x i1>
1165 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
1166 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
1167 ret <8 x double> %res1
1170 define <8 x i64> @test_mm512_unpackhi_epi32(<8 x i64> %a0, <8 x i64> %a1) {
1171 ; X32-LABEL: test_mm512_unpackhi_epi32:
1173 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1176 ; X64-LABEL: test_mm512_unpackhi_epi32:
1178 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1180 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
1181 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
1182 %res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1183 %res1 = bitcast <16 x i32> %res0 to <8 x i64>
1187 define <8 x i64> @test_mm512_mask_unpackhi_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
1188 ; X32-LABEL: test_mm512_mask_unpackhi_epi32:
1190 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1191 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
1194 ; X64-LABEL: test_mm512_mask_unpackhi_epi32:
1196 ; X64-NEXT: kmovw %edi, %k1
1197 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
1199 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
1200 %arg1 = bitcast i16 %a1 to <16 x i1>
1201 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
1202 %arg3 = bitcast <8 x i64> %a3 to <16 x i32>
1203 %res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1204 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
1205 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
1209 define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
1210 ; X32-LABEL: test_mm512_maskz_unpackhi_epi32:
1212 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1213 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1216 ; X64-LABEL: test_mm512_maskz_unpackhi_epi32:
1218 ; X64-NEXT: kmovw %edi, %k1
1219 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1221 %arg0 = bitcast i16 %a0 to <16 x i1>
1222 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
1223 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
1224 %res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1225 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
1226 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
1230 define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) {
1231 ; X32-LABEL: test_mm512_unpackhi_epi64:
1233 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1236 ; X64-LABEL: test_mm512_unpackhi_epi64:
1238 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1240 %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1244 define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
1245 ; X32-LABEL: test_mm512_mask_unpackhi_epi64:
1247 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1248 ; X32-NEXT: kmovw %eax, %k1
1249 ; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
1252 ; X64-LABEL: test_mm512_mask_unpackhi_epi64:
1254 ; X64-NEXT: kmovw %edi, %k1
1255 ; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
1257 %arg1 = bitcast i8 %a1 to <8 x i1>
1258 %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1259 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
1263 define <8 x i64> @test_mm512_maskz_unpackhi_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
1264 ; X32-LABEL: test_mm512_maskz_unpackhi_epi64:
1266 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1267 ; X32-NEXT: kmovw %eax, %k1
1268 ; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1271 ; X64-LABEL: test_mm512_maskz_unpackhi_epi64:
1273 ; X64-NEXT: kmovw %edi, %k1
1274 ; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1276 %arg0 = bitcast i8 %a0 to <8 x i1>
1277 %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1278 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
1282 define <8 x double> @test_mm512_unpackhi_pd(<8 x double> %a0, <8 x double> %a1) {
1283 ; X32-LABEL: test_mm512_unpackhi_pd:
1285 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1288 ; X64-LABEL: test_mm512_unpackhi_pd:
1290 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1292 %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1293 ret <8 x double> %res
1296 define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
1297 ; X32-LABEL: test_mm512_mask_unpackhi_pd:
1299 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1300 ; X32-NEXT: kmovw %eax, %k1
1301 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
1304 ; X64-LABEL: test_mm512_mask_unpackhi_pd:
1306 ; X64-NEXT: kmovw %edi, %k1
1307 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
1309 %arg1 = bitcast i8 %a1 to <8 x i1>
1310 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1311 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
1312 ret <8 x double> %res1
1315 define <8 x double> @test_mm512_maskz_unpackhi_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
1316 ; X32-LABEL: test_mm512_maskz_unpackhi_pd:
1318 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1319 ; X32-NEXT: kmovw %eax, %k1
1320 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1323 ; X64-LABEL: test_mm512_maskz_unpackhi_pd:
1325 ; X64-NEXT: kmovw %edi, %k1
1326 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1328 %arg0 = bitcast i8 %a0 to <8 x i1>
1329 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1330 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
1331 ret <8 x double> %res1
1334 define <16 x float> @test_mm512_unpackhi_ps(<16 x float> %a0, <16 x float> %a1) {
1335 ; X32-LABEL: test_mm512_unpackhi_ps:
1337 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1340 ; X64-LABEL: test_mm512_unpackhi_ps:
1342 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1344 %res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1345 ret <16 x float> %res
1348 define <16 x float> @test_mm512_mask_unpackhi_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
1349 ; X32-LABEL: test_mm512_mask_unpackhi_ps:
1351 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1352 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
1355 ; X64-LABEL: test_mm512_mask_unpackhi_ps:
1357 ; X64-NEXT: kmovw %edi, %k1
1358 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
1360 %arg1 = bitcast i16 %a1 to <16 x i1>
1361 %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1362 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
1363 ret <16 x float> %res1
1366 define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
1367 ; X32-LABEL: test_mm512_maskz_unpackhi_ps:
1369 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1370 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1373 ; X64-LABEL: test_mm512_maskz_unpackhi_ps:
1375 ; X64-NEXT: kmovw %edi, %k1
1376 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1378 %arg0 = bitcast i16 %a0 to <16 x i1>
1379 %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1380 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
1381 ret <16 x float> %res1
1384 define <8 x i64> @test_mm512_unpacklo_epi32(<8 x i64> %a0, <8 x i64> %a1) {
1385 ; X32-LABEL: test_mm512_unpacklo_epi32:
1387 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1390 ; X64-LABEL: test_mm512_unpacklo_epi32:
1392 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1394 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
1395 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
1396 %res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1397 %res1 = bitcast <16 x i32> %res0 to <8 x i64>
1401 define <8 x i64> @test_mm512_mask_unpacklo_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
1402 ; X32-LABEL: test_mm512_mask_unpacklo_epi32:
1404 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1405 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
1408 ; X64-LABEL: test_mm512_mask_unpacklo_epi32:
1410 ; X64-NEXT: kmovw %edi, %k1
1411 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
1413 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
1414 %arg1 = bitcast i16 %a1 to <16 x i1>
1415 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
1416 %arg3 = bitcast <8 x i64> %a3 to <16 x i32>
1417 %res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1418 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
1419 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
1423 define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
1424 ; X32-LABEL: test_mm512_maskz_unpacklo_epi32:
1426 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1427 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1430 ; X64-LABEL: test_mm512_maskz_unpacklo_epi32:
1432 ; X64-NEXT: kmovw %edi, %k1
1433 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1435 %arg0 = bitcast i16 %a0 to <16 x i1>
1436 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
1437 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
1438 %res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1439 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
1440 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
1444 define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) {
1445 ; X32-LABEL: test_mm512_unpacklo_epi64:
1447 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1450 ; X64-LABEL: test_mm512_unpacklo_epi64:
1452 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1454 %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1458 define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
1459 ; X32-LABEL: test_mm512_mask_unpacklo_epi64:
1461 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1462 ; X32-NEXT: kmovw %eax, %k1
1463 ; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1466 ; X64-LABEL: test_mm512_mask_unpacklo_epi64:
1468 ; X64-NEXT: kmovw %edi, %k1
1469 ; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1471 %arg1 = bitcast i8 %a1 to <8 x i1>
1472 %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1473 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
1477 define <8 x i64> @test_mm512_maskz_unpacklo_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
1478 ; X32-LABEL: test_mm512_maskz_unpacklo_epi64:
1480 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1481 ; X32-NEXT: kmovw %eax, %k1
1482 ; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1485 ; X64-LABEL: test_mm512_maskz_unpacklo_epi64:
1487 ; X64-NEXT: kmovw %edi, %k1
1488 ; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1490 %arg0 = bitcast i8 %a0 to <8 x i1>
1491 %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1492 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
1496 define <8 x double> @test_mm512_unpacklo_pd(<8 x double> %a0, <8 x double> %a1) {
1497 ; X32-LABEL: test_mm512_unpacklo_pd:
1499 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1502 ; X64-LABEL: test_mm512_unpacklo_pd:
1504 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1506 %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1507 ret <8 x double> %res
1510 define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
1511 ; X32-LABEL: test_mm512_mask_unpacklo_pd:
1513 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1514 ; X32-NEXT: kmovw %eax, %k1
1515 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1518 ; X64-LABEL: test_mm512_mask_unpacklo_pd:
1520 ; X64-NEXT: kmovw %edi, %k1
1521 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1523 %arg1 = bitcast i8 %a1 to <8 x i1>
1524 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1525 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
1526 ret <8 x double> %res1
1529 define <8 x double> @test_mm512_maskz_unpacklo_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
1530 ; X32-LABEL: test_mm512_maskz_unpacklo_pd:
1532 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1533 ; X32-NEXT: kmovw %eax, %k1
1534 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1537 ; X64-LABEL: test_mm512_maskz_unpacklo_pd:
1539 ; X64-NEXT: kmovw %edi, %k1
1540 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1542 %arg0 = bitcast i8 %a0 to <8 x i1>
1543 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1544 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
1545 ret <8 x double> %res1
1548 define <16 x float> @test_mm512_unpacklo_ps(<16 x float> %a0, <16 x float> %a1) {
1549 ; X32-LABEL: test_mm512_unpacklo_ps:
1551 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1554 ; X64-LABEL: test_mm512_unpacklo_ps:
1556 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1558 %res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1559 ret <16 x float> %res
1562 define <16 x float> @test_mm512_mask_unpacklo_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
1563 ; X32-LABEL: test_mm512_mask_unpacklo_ps:
1565 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1566 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
1569 ; X64-LABEL: test_mm512_mask_unpacklo_ps:
1571 ; X64-NEXT: kmovw %edi, %k1
1572 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
1574 %arg1 = bitcast i16 %a1 to <16 x i1>
1575 %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1576 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
1577 ret <16 x float> %res1
1580 define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
1581 ; X32-LABEL: test_mm512_maskz_unpacklo_ps:
1583 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1584 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1587 ; X64-LABEL: test_mm512_maskz_unpacklo_ps:
1589 ; X64-NEXT: kmovw %edi, %k1
1590 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1592 %arg0 = bitcast i16 %a0 to <16 x i1>
1593 %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1594 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
1595 ret <16 x float> %res1
1598 define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind {
1599 ; X32-LABEL: test_mm512_zextpd128_pd512:
1601 ; X32-NEXT: vmovaps %xmm0, %xmm0
1604 ; X64-LABEL: test_mm512_zextpd128_pd512:
1606 ; X64-NEXT: vmovaps %xmm0, %xmm0
1608 %res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
1609 ret <8 x double> %res
1612 define <8 x double> @test_mm512_zextpd256_pd512(<4 x double> %a0) nounwind {
1613 ; X32-LABEL: test_mm512_zextpd256_pd512:
1615 ; X32-NEXT: vmovaps %ymm0, %ymm0
1618 ; X64-LABEL: test_mm512_zextpd256_pd512:
1620 ; X64-NEXT: vmovaps %ymm0, %ymm0
1622 %res = shufflevector <4 x double> %a0, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1623 ret <8 x double> %res
1626 define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind {
1627 ; X32-LABEL: test_mm512_zextps128_ps512:
1629 ; X32-NEXT: vmovaps %xmm0, %xmm0
1630 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
1631 ; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1634 ; X64-LABEL: test_mm512_zextps128_ps512:
1636 ; X64-NEXT: vmovaps %xmm0, %xmm0
1637 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
1638 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1640 %res = shufflevector <4 x float> %a0, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
1641 ret <16 x float> %res
1644 define <16 x float> @test_mm512_zextps256_ps512(<8 x float> %a0) nounwind {
1645 ; X32-LABEL: test_mm512_zextps256_ps512:
1647 ; X32-NEXT: vmovaps %ymm0, %ymm0
1650 ; X64-LABEL: test_mm512_zextps256_ps512:
1652 ; X64-NEXT: vmovaps %ymm0, %ymm0
1654 %res = shufflevector <8 x float> %a0, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1655 ret <16 x float> %res
1658 define <8 x i64> @test_mm512_zextsi128_si512(<2 x i64> %a0) nounwind {
1659 ; X32-LABEL: test_mm512_zextsi128_si512:
1661 ; X32-NEXT: vmovaps %xmm0, %xmm0
1664 ; X64-LABEL: test_mm512_zextsi128_si512:
1666 ; X64-NEXT: vmovaps %xmm0, %xmm0
1668 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
1672 define <8 x i64> @test_mm512_zextsi256_si512(<4 x i64> %a0) nounwind {
1673 ; X32-LABEL: test_mm512_zextsi256_si512:
1675 ; X32-NEXT: vmovaps %ymm0, %ymm0
1678 ; X64-LABEL: test_mm512_zextsi256_si512:
1680 ; X64-NEXT: vmovaps %ymm0, %ymm0
1682 %res = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>