1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi2-builtins.c
7 define <8 x i64> @test_mm512_mask_compress_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
8 ; X86-LABEL: test_mm512_mask_compress_epi16:
9 ; X86: # %bb.0: # %entry
10 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
11 ; X86-NEXT: vpcompressw %zmm1, %zmm0 {%k1}
14 ; X64-LABEL: test_mm512_mask_compress_epi16:
15 ; X64: # %bb.0: # %entry
16 ; X64-NEXT: kmovd %edi, %k1
17 ; X64-NEXT: vpcompressw %zmm1, %zmm0 {%k1}
20 %0 = bitcast <8 x i64> %__D to <32 x i16>
21 %1 = bitcast <8 x i64> %__S to <32 x i16>
22 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
23 %3 = bitcast <32 x i16> %2 to <8 x i64>
27 define <8 x i64> @test_mm512_maskz_compress_epi16(i32 %__U, <8 x i64> %__D) {
28 ; X86-LABEL: test_mm512_maskz_compress_epi16:
29 ; X86: # %bb.0: # %entry
30 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
31 ; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z}
34 ; X64-LABEL: test_mm512_maskz_compress_epi16:
35 ; X64: # %bb.0: # %entry
36 ; X64-NEXT: kmovd %edi, %k1
37 ; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z}
40 %0 = bitcast <8 x i64> %__D to <32 x i16>
41 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
42 %2 = bitcast <32 x i16> %1 to <8 x i64>
46 define <8 x i64> @test_mm512_mask_compress_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
47 ; X86-LABEL: test_mm512_mask_compress_epi8:
48 ; X86: # %bb.0: # %entry
49 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
50 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
51 ; X86-NEXT: kunpckdq %k1, %k0, %k1
52 ; X86-NEXT: vpcompressb %zmm1, %zmm0 {%k1}
55 ; X64-LABEL: test_mm512_mask_compress_epi8:
56 ; X64: # %bb.0: # %entry
57 ; X64-NEXT: kmovq %rdi, %k1
58 ; X64-NEXT: vpcompressb %zmm1, %zmm0 {%k1}
61 %0 = bitcast <8 x i64> %__D to <64 x i8>
62 %1 = bitcast <8 x i64> %__S to <64 x i8>
63 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
64 %3 = bitcast <64 x i8> %2 to <8 x i64>
68 define <8 x i64> @test_mm512_maskz_compress_epi8(i64 %__U, <8 x i64> %__D) {
69 ; X86-LABEL: test_mm512_maskz_compress_epi8:
70 ; X86: # %bb.0: # %entry
71 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
72 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
73 ; X86-NEXT: kunpckdq %k1, %k0, %k1
74 ; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z}
77 ; X64-LABEL: test_mm512_maskz_compress_epi8:
78 ; X64: # %bb.0: # %entry
79 ; X64-NEXT: kmovq %rdi, %k1
80 ; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z}
83 %0 = bitcast <8 x i64> %__D to <64 x i8>
84 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
85 %2 = bitcast <64 x i8> %1 to <8 x i64>
89 define void @test_mm512_mask_compressstoreu_epi16(i8* %__P, i32 %__U, <8 x i64> %__D) {
90 ; X86-LABEL: test_mm512_mask_compressstoreu_epi16:
91 ; X86: # %bb.0: # %entry
92 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
93 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
94 ; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1}
95 ; X86-NEXT: vzeroupper
98 ; X64-LABEL: test_mm512_mask_compressstoreu_epi16:
99 ; X64: # %bb.0: # %entry
100 ; X64-NEXT: kmovd %esi, %k1
101 ; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1}
102 ; X64-NEXT: vzeroupper
105 %0 = bitcast <8 x i64> %__D to <32 x i16>
106 %1 = bitcast i8* %__P to i16*
107 %2 = bitcast i32 %__U to <32 x i1>
108 tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, i16* %1, <32 x i1> %2)
112 define void @test_mm512_mask_compressstoreu_epi8(i8* %__P, i64 %__U, <8 x i64> %__D) {
113 ; X86-LABEL: test_mm512_mask_compressstoreu_epi8:
114 ; X86: # %bb.0: # %entry
115 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
116 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
117 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
118 ; X86-NEXT: kunpckdq %k1, %k0, %k1
119 ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1}
120 ; X86-NEXT: vzeroupper
123 ; X64-LABEL: test_mm512_mask_compressstoreu_epi8:
124 ; X64: # %bb.0: # %entry
125 ; X64-NEXT: kmovq %rsi, %k1
126 ; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1}
127 ; X64-NEXT: vzeroupper
130 %0 = bitcast <8 x i64> %__D to <64 x i8>
131 %1 = bitcast i64 %__U to <64 x i1>
132 tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, i8* %__P, <64 x i1> %1)
136 define <8 x i64> @test_mm512_mask_expand_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
137 ; X86-LABEL: test_mm512_mask_expand_epi16:
138 ; X86: # %bb.0: # %entry
139 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
140 ; X86-NEXT: vpexpandw %zmm1, %zmm0 {%k1}
143 ; X64-LABEL: test_mm512_mask_expand_epi16:
144 ; X64: # %bb.0: # %entry
145 ; X64-NEXT: kmovd %edi, %k1
146 ; X64-NEXT: vpexpandw %zmm1, %zmm0 {%k1}
149 %0 = bitcast <8 x i64> %__D to <32 x i16>
150 %1 = bitcast <8 x i64> %__S to <32 x i16>
151 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
152 %3 = bitcast <32 x i16> %2 to <8 x i64>
156 define <8 x i64> @test_mm512_maskz_expand_epi16(i32 %__U, <8 x i64> %__D) {
157 ; X86-LABEL: test_mm512_maskz_expand_epi16:
158 ; X86: # %bb.0: # %entry
159 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
160 ; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z}
163 ; X64-LABEL: test_mm512_maskz_expand_epi16:
164 ; X64: # %bb.0: # %entry
165 ; X64-NEXT: kmovd %edi, %k1
166 ; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z}
169 %0 = bitcast <8 x i64> %__D to <32 x i16>
170 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
171 %2 = bitcast <32 x i16> %1 to <8 x i64>
175 define <8 x i64> @test_mm512_mask_expand_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
176 ; X86-LABEL: test_mm512_mask_expand_epi8:
177 ; X86: # %bb.0: # %entry
178 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
179 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
180 ; X86-NEXT: kunpckdq %k1, %k0, %k1
181 ; X86-NEXT: vpexpandb %zmm1, %zmm0 {%k1}
184 ; X64-LABEL: test_mm512_mask_expand_epi8:
185 ; X64: # %bb.0: # %entry
186 ; X64-NEXT: kmovq %rdi, %k1
187 ; X64-NEXT: vpexpandb %zmm1, %zmm0 {%k1}
190 %0 = bitcast <8 x i64> %__D to <64 x i8>
191 %1 = bitcast <8 x i64> %__S to <64 x i8>
192 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
193 %3 = bitcast <64 x i8> %2 to <8 x i64>
197 define <8 x i64> @test_mm512_maskz_expand_epi8(i64 %__U, <8 x i64> %__D) {
198 ; X86-LABEL: test_mm512_maskz_expand_epi8:
199 ; X86: # %bb.0: # %entry
200 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
201 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
202 ; X86-NEXT: kunpckdq %k1, %k0, %k1
203 ; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z}
206 ; X64-LABEL: test_mm512_maskz_expand_epi8:
207 ; X64: # %bb.0: # %entry
208 ; X64-NEXT: kmovq %rdi, %k1
209 ; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z}
212 %0 = bitcast <8 x i64> %__D to <64 x i8>
213 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
214 %2 = bitcast <64 x i8> %1 to <8 x i64>
218 define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, i8* readonly %__P) {
219 ; X86-LABEL: test_mm512_mask_expandloadu_epi16:
220 ; X86: # %bb.0: # %entry
221 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
222 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
223 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1}
226 ; X64-LABEL: test_mm512_mask_expandloadu_epi16:
227 ; X64: # %bb.0: # %entry
228 ; X64-NEXT: kmovd %edi, %k1
229 ; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1}
232 %0 = bitcast <8 x i64> %__S to <32 x i16>
233 %1 = bitcast i8* %__P to i16*
234 %2 = bitcast i32 %__U to <32 x i1>
235 %3 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %0)
236 %4 = bitcast <32 x i16> %3 to <8 x i64>
240 define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, i8* readonly %__P) {
241 ; X86-LABEL: test_mm512_maskz_expandloadu_epi16:
242 ; X86: # %bb.0: # %entry
243 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
244 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
245 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z}
248 ; X64-LABEL: test_mm512_maskz_expandloadu_epi16:
249 ; X64: # %bb.0: # %entry
250 ; X64-NEXT: kmovd %edi, %k1
251 ; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1} {z}
254 %0 = bitcast i8* %__P to i16*
255 %1 = bitcast i32 %__U to <32 x i1>
256 %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %0, <32 x i1> %1, <32 x i16> zeroinitializer)
257 %3 = bitcast <32 x i16> %2 to <8 x i64>
261 define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, i8* readonly %__P) {
262 ; X86-LABEL: test_mm512_mask_expandloadu_epi8:
263 ; X86: # %bb.0: # %entry
264 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
265 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
266 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
267 ; X86-NEXT: kunpckdq %k1, %k0, %k1
268 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1}
271 ; X64-LABEL: test_mm512_mask_expandloadu_epi8:
272 ; X64: # %bb.0: # %entry
273 ; X64-NEXT: kmovq %rdi, %k1
274 ; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1}
277 %0 = bitcast <8 x i64> %__S to <64 x i8>
278 %1 = bitcast i64 %__U to <64 x i1>
279 %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %1, <64 x i8> %0)
280 %3 = bitcast <64 x i8> %2 to <8 x i64>
284 define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, i8* readonly %__P) {
285 ; X86-LABEL: test_mm512_maskz_expandloadu_epi8:
286 ; X86: # %bb.0: # %entry
287 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
288 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
289 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
290 ; X86-NEXT: kunpckdq %k1, %k0, %k1
291 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z}
294 ; X64-LABEL: test_mm512_maskz_expandloadu_epi8:
295 ; X64: # %bb.0: # %entry
296 ; X64-NEXT: kmovq %rdi, %k1
297 ; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1} {z}
300 %0 = bitcast i64 %__U to <64 x i1>
301 %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %0, <64 x i8> zeroinitializer)
302 %2 = bitcast <64 x i8> %1 to <8 x i64>
306 define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
307 ; X86-LABEL: test_mm512_mask_shldi_epi64:
308 ; X86: # %bb.0: # %entry
309 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
310 ; X86-NEXT: kmovd %eax, %k1
311 ; X86-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
314 ; X64-LABEL: test_mm512_mask_shldi_epi64:
315 ; X64: # %bb.0: # %entry
316 ; X64-NEXT: kmovd %edi, %k1
317 ; X64-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
320 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
321 %1 = bitcast i8 %__U to <8 x i1>
322 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
326 declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
328 define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
329 ; X86-LABEL: test_mm512_maskz_shldi_epi64:
330 ; X86: # %bb.0: # %entry
331 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
332 ; X86-NEXT: kmovd %eax, %k1
333 ; X86-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
336 ; X64-LABEL: test_mm512_maskz_shldi_epi64:
337 ; X64: # %bb.0: # %entry
338 ; X64-NEXT: kmovd %edi, %k1
339 ; X64-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
342 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
343 %1 = bitcast i8 %__U to <8 x i1>
344 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
348 define <8 x i64> @test_mm512_shldi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
349 ; CHECK-LABEL: test_mm512_shldi_epi64:
350 ; CHECK: # %bb.0: # %entry
351 ; CHECK-NEXT: vpshldq $31, %zmm1, %zmm0, %zmm0
352 ; CHECK-NEXT: ret{{[l|q]}}
354 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
358 define <8 x i64> @test_mm512_mask_shldi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
359 ; X86-LABEL: test_mm512_mask_shldi_epi32:
360 ; X86: # %bb.0: # %entry
361 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
362 ; X86-NEXT: vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
365 ; X64-LABEL: test_mm512_mask_shldi_epi32:
366 ; X64: # %bb.0: # %entry
367 ; X64-NEXT: kmovd %edi, %k1
368 ; X64-NEXT: vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
371 %0 = bitcast <8 x i64> %__A to <16 x i32>
372 %1 = bitcast <8 x i64> %__B to <16 x i32>
373 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
374 %3 = bitcast <8 x i64> %__S to <16 x i32>
375 %4 = bitcast i16 %__U to <16 x i1>
376 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
377 %6 = bitcast <16 x i32> %5 to <8 x i64>
381 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
383 define <8 x i64> @test_mm512_maskz_shldi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
384 ; X86-LABEL: test_mm512_maskz_shldi_epi32:
385 ; X86: # %bb.0: # %entry
386 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
387 ; X86-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
390 ; X64-LABEL: test_mm512_maskz_shldi_epi32:
391 ; X64: # %bb.0: # %entry
392 ; X64-NEXT: kmovd %edi, %k1
393 ; X64-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
396 %0 = bitcast <8 x i64> %__A to <16 x i32>
397 %1 = bitcast <8 x i64> %__B to <16 x i32>
398 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
399 %3 = bitcast i16 %__U to <16 x i1>
400 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
401 %5 = bitcast <16 x i32> %4 to <8 x i64>
405 define <8 x i64> @test_mm512_shldi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
406 ; CHECK-LABEL: test_mm512_shldi_epi32:
407 ; CHECK: # %bb.0: # %entry
408 ; CHECK-NEXT: vpshldd $31, %zmm1, %zmm0, %zmm0
409 ; CHECK-NEXT: ret{{[l|q]}}
411 %0 = bitcast <8 x i64> %__A to <16 x i32>
412 %1 = bitcast <8 x i64> %__B to <16 x i32>
413 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
414 %3 = bitcast <16 x i32> %2 to <8 x i64>
418 define <8 x i64> @test_mm512_mask_shldi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
419 ; X86-LABEL: test_mm512_mask_shldi_epi16:
420 ; X86: # %bb.0: # %entry
421 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
422 ; X86-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
425 ; X64-LABEL: test_mm512_mask_shldi_epi16:
426 ; X64: # %bb.0: # %entry
427 ; X64-NEXT: kmovd %edi, %k1
428 ; X64-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
431 %0 = bitcast <8 x i64> %__A to <32 x i16>
432 %1 = bitcast <8 x i64> %__B to <32 x i16>
433 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
434 %3 = bitcast <8 x i64> %__S to <32 x i16>
435 %4 = bitcast i32 %__U to <32 x i1>
436 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
437 %6 = bitcast <32 x i16> %5 to <8 x i64>
441 declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
443 define <8 x i64> @test_mm512_maskz_shldi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
444 ; X86-LABEL: test_mm512_maskz_shldi_epi16:
445 ; X86: # %bb.0: # %entry
446 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
447 ; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
450 ; X64-LABEL: test_mm512_maskz_shldi_epi16:
451 ; X64: # %bb.0: # %entry
452 ; X64-NEXT: kmovd %edi, %k1
453 ; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
456 %0 = bitcast <8 x i64> %__A to <32 x i16>
457 %1 = bitcast <8 x i64> %__B to <32 x i16>
458 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
459 %3 = bitcast i32 %__U to <32 x i1>
460 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
461 %5 = bitcast <32 x i16> %4 to <8 x i64>
465 define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
466 ; CHECK-LABEL: test_mm512_shldi_epi16:
467 ; CHECK: # %bb.0: # %entry
468 ; CHECK-NEXT: vpshldw $15, %zmm1, %zmm0, %zmm0
469 ; CHECK-NEXT: ret{{[l|q]}}
471 %0 = bitcast <8 x i64> %__A to <32 x i16>
472 %1 = bitcast <8 x i64> %__B to <32 x i16>
473 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
474 %3 = bitcast <32 x i16> %2 to <8 x i64>
478 define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
479 ; X86-LABEL: test_mm512_mask_shrdi_epi64:
480 ; X86: # %bb.0: # %entry
481 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
482 ; X86-NEXT: kmovd %eax, %k1
483 ; X86-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
486 ; X64-LABEL: test_mm512_mask_shrdi_epi64:
487 ; X64: # %bb.0: # %entry
488 ; X64-NEXT: kmovd %edi, %k1
489 ; X64-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
492 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
493 %1 = bitcast i8 %__U to <8 x i1>
494 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
498 declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
500 define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
501 ; X86-LABEL: test_mm512_maskz_shrdi_epi64:
502 ; X86: # %bb.0: # %entry
503 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
504 ; X86-NEXT: kmovd %eax, %k1
505 ; X86-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
508 ; X64-LABEL: test_mm512_maskz_shrdi_epi64:
509 ; X64: # %bb.0: # %entry
510 ; X64-NEXT: kmovd %edi, %k1
511 ; X64-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
514 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
515 %1 = bitcast i8 %__U to <8 x i1>
516 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
520 define <8 x i64> @test_mm512_shrdi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
521 ; CHECK-LABEL: test_mm512_shrdi_epi64:
522 ; CHECK: # %bb.0: # %entry
523 ; CHECK-NEXT: vpshrdq $31, %zmm1, %zmm0, %zmm0
524 ; CHECK-NEXT: ret{{[l|q]}}
526 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
530 define <8 x i64> @test_mm512_mask_shrdi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
531 ; X86-LABEL: test_mm512_mask_shrdi_epi32:
532 ; X86: # %bb.0: # %entry
533 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
534 ; X86-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
537 ; X64-LABEL: test_mm512_mask_shrdi_epi32:
538 ; X64: # %bb.0: # %entry
539 ; X64-NEXT: kmovd %edi, %k1
540 ; X64-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
543 %0 = bitcast <8 x i64> %__A to <16 x i32>
544 %1 = bitcast <8 x i64> %__B to <16 x i32>
545 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
546 %3 = bitcast <8 x i64> %__S to <16 x i32>
547 %4 = bitcast i16 %__U to <16 x i1>
548 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
549 %6 = bitcast <16 x i32> %5 to <8 x i64>
553 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
555 define <8 x i64> @test_mm512_maskz_shrdi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
556 ; X86-LABEL: test_mm512_maskz_shrdi_epi32:
557 ; X86: # %bb.0: # %entry
558 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
559 ; X86-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
562 ; X64-LABEL: test_mm512_maskz_shrdi_epi32:
563 ; X64: # %bb.0: # %entry
564 ; X64-NEXT: kmovd %edi, %k1
565 ; X64-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
568 %0 = bitcast <8 x i64> %__A to <16 x i32>
569 %1 = bitcast <8 x i64> %__B to <16 x i32>
570 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
571 %3 = bitcast i16 %__U to <16 x i1>
572 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
573 %5 = bitcast <16 x i32> %4 to <8 x i64>
577 define <8 x i64> @test_mm512_shrdi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
578 ; CHECK-LABEL: test_mm512_shrdi_epi32:
579 ; CHECK: # %bb.0: # %entry
580 ; CHECK-NEXT: vpshrdd $31, %zmm1, %zmm0, %zmm0
581 ; CHECK-NEXT: ret{{[l|q]}}
583 %0 = bitcast <8 x i64> %__A to <16 x i32>
584 %1 = bitcast <8 x i64> %__B to <16 x i32>
585 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
586 %3 = bitcast <16 x i32> %2 to <8 x i64>
590 define <8 x i64> @test_mm512_mask_shrdi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
591 ; X86-LABEL: test_mm512_mask_shrdi_epi16:
592 ; X86: # %bb.0: # %entry
593 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
594 ; X86-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
597 ; X64-LABEL: test_mm512_mask_shrdi_epi16:
598 ; X64: # %bb.0: # %entry
599 ; X64-NEXT: kmovd %edi, %k1
600 ; X64-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
603 %0 = bitcast <8 x i64> %__A to <32 x i16>
604 %1 = bitcast <8 x i64> %__B to <32 x i16>
605 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
606 %3 = bitcast <8 x i64> %__S to <32 x i16>
607 %4 = bitcast i32 %__U to <32 x i1>
608 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
609 %6 = bitcast <32 x i16> %5 to <8 x i64>
613 declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
615 define <8 x i64> @test_mm512_maskz_shrdi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
616 ; X86-LABEL: test_mm512_maskz_shrdi_epi16:
617 ; X86: # %bb.0: # %entry
618 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
619 ; X86-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
622 ; X64-LABEL: test_mm512_maskz_shrdi_epi16:
623 ; X64: # %bb.0: # %entry
624 ; X64-NEXT: kmovd %edi, %k1
625 ; X64-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
628 %0 = bitcast <8 x i64> %__A to <32 x i16>
629 %1 = bitcast <8 x i64> %__B to <32 x i16>
630 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
631 %3 = bitcast i32 %__U to <32 x i1>
632 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
633 %5 = bitcast <32 x i16> %4 to <8 x i64>
637 define <8 x i64> @test_mm512_shrdi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
638 ; CHECK-LABEL: test_mm512_shrdi_epi16:
639 ; CHECK: # %bb.0: # %entry
640 ; CHECK-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0
641 ; CHECK-NEXT: ret{{[l|q]}}
643 %0 = bitcast <8 x i64> %__A to <32 x i16>
644 %1 = bitcast <8 x i64> %__B to <32 x i16>
645 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
646 %3 = bitcast <32 x i16> %2 to <8 x i64>
650 define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
651 ; X86-LABEL: test_mm512_mask_shldv_epi64:
652 ; X86: # %bb.0: # %entry
653 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
654 ; X86-NEXT: kmovd %eax, %k1
655 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
658 ; X64-LABEL: test_mm512_mask_shldv_epi64:
659 ; X64: # %bb.0: # %entry
660 ; X64-NEXT: kmovd %edi, %k1
661 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
664 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
665 %1 = bitcast i8 %__U to <8 x i1>
666 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
670 define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
671 ; X86-LABEL: test_mm512_maskz_shldv_epi64:
672 ; X86: # %bb.0: # %entry
673 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
674 ; X86-NEXT: kmovd %eax, %k1
675 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
678 ; X64-LABEL: test_mm512_maskz_shldv_epi64:
679 ; X64: # %bb.0: # %entry
680 ; X64-NEXT: kmovd %edi, %k1
681 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
684 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
685 %1 = bitcast i8 %__U to <8 x i1>
686 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
690 define <8 x i64> @test_mm512_shldv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
691 ; CHECK-LABEL: test_mm512_shldv_epi64:
692 ; CHECK: # %bb.0: # %entry
693 ; CHECK-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
694 ; CHECK-NEXT: ret{{[l|q]}}
696 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
700 define <8 x i64> @test_mm512_mask_shldv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
701 ; X86-LABEL: test_mm512_mask_shldv_epi32:
702 ; X86: # %bb.0: # %entry
703 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
704 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
707 ; X64-LABEL: test_mm512_mask_shldv_epi32:
708 ; X64: # %bb.0: # %entry
709 ; X64-NEXT: kmovd %edi, %k1
710 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
713 %0 = bitcast <8 x i64> %__S to <16 x i32>
714 %1 = bitcast <8 x i64> %__A to <16 x i32>
715 %2 = bitcast <8 x i64> %__B to <16 x i32>
716 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
717 %4 = bitcast i16 %__U to <16 x i1>
718 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
719 %6 = bitcast <16 x i32> %5 to <8 x i64>
723 define <8 x i64> @test_mm512_maskz_shldv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
724 ; X86-LABEL: test_mm512_maskz_shldv_epi32:
725 ; X86: # %bb.0: # %entry
726 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
727 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
730 ; X64-LABEL: test_mm512_maskz_shldv_epi32:
731 ; X64: # %bb.0: # %entry
732 ; X64-NEXT: kmovd %edi, %k1
733 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
736 %0 = bitcast <8 x i64> %__S to <16 x i32>
737 %1 = bitcast <8 x i64> %__A to <16 x i32>
738 %2 = bitcast <8 x i64> %__B to <16 x i32>
739 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
740 %4 = bitcast i16 %__U to <16 x i1>
741 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
742 %6 = bitcast <16 x i32> %5 to <8 x i64>
746 define <8 x i64> @test_mm512_shldv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
747 ; CHECK-LABEL: test_mm512_shldv_epi32:
748 ; CHECK: # %bb.0: # %entry
749 ; CHECK-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
750 ; CHECK-NEXT: ret{{[l|q]}}
752 %0 = bitcast <8 x i64> %__S to <16 x i32>
753 %1 = bitcast <8 x i64> %__A to <16 x i32>
754 %2 = bitcast <8 x i64> %__B to <16 x i32>
755 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
756 %4 = bitcast <16 x i32> %3 to <8 x i64>
760 define <8 x i64> @test_mm512_mask_shldv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
761 ; X86-LABEL: test_mm512_mask_shldv_epi16:
762 ; X86: # %bb.0: # %entry
763 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
764 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
767 ; X64-LABEL: test_mm512_mask_shldv_epi16:
768 ; X64: # %bb.0: # %entry
769 ; X64-NEXT: kmovd %edi, %k1
770 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
773 %0 = bitcast <8 x i64> %__S to <32 x i16>
774 %1 = bitcast <8 x i64> %__A to <32 x i16>
775 %2 = bitcast <8 x i64> %__B to <32 x i16>
776 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
777 %4 = bitcast i32 %__U to <32 x i1>
778 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
779 %6 = bitcast <32 x i16> %5 to <8 x i64>
783 define <8 x i64> @test_mm512_maskz_shldv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
784 ; X86-LABEL: test_mm512_maskz_shldv_epi16:
785 ; X86: # %bb.0: # %entry
786 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
787 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
790 ; X64-LABEL: test_mm512_maskz_shldv_epi16:
791 ; X64: # %bb.0: # %entry
792 ; X64-NEXT: kmovd %edi, %k1
793 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
796 %0 = bitcast <8 x i64> %__S to <32 x i16>
797 %1 = bitcast <8 x i64> %__A to <32 x i16>
798 %2 = bitcast <8 x i64> %__B to <32 x i16>
799 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
800 %4 = bitcast i32 %__U to <32 x i1>
801 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
802 %6 = bitcast <32 x i16> %5 to <8 x i64>
806 define <8 x i64> @test_mm512_shldv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
807 ; CHECK-LABEL: test_mm512_shldv_epi16:
808 ; CHECK: # %bb.0: # %entry
809 ; CHECK-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
810 ; CHECK-NEXT: ret{{[l|q]}}
812 %0 = bitcast <8 x i64> %__S to <32 x i16>
813 %1 = bitcast <8 x i64> %__A to <32 x i16>
814 %2 = bitcast <8 x i64> %__B to <32 x i16>
815 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
816 %4 = bitcast <32 x i16> %3 to <8 x i64>
820 define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
821 ; X86-LABEL: test_mm512_mask_shrdv_epi64:
822 ; X86: # %bb.0: # %entry
823 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
824 ; X86-NEXT: kmovd %eax, %k1
825 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
828 ; X64-LABEL: test_mm512_mask_shrdv_epi64:
829 ; X64: # %bb.0: # %entry
830 ; X64-NEXT: kmovd %edi, %k1
831 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
834 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
835 %1 = bitcast i8 %__U to <8 x i1>
836 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
840 define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
841 ; X86-LABEL: test_mm512_maskz_shrdv_epi64:
842 ; X86: # %bb.0: # %entry
843 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
844 ; X86-NEXT: kmovd %eax, %k1
845 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
848 ; X64-LABEL: test_mm512_maskz_shrdv_epi64:
849 ; X64: # %bb.0: # %entry
850 ; X64-NEXT: kmovd %edi, %k1
851 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
854 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
855 %1 = bitcast i8 %__U to <8 x i1>
856 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
860 define <8 x i64> @test_mm512_shrdv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
861 ; CHECK-LABEL: test_mm512_shrdv_epi64:
862 ; CHECK: # %bb.0: # %entry
863 ; CHECK-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0
864 ; CHECK-NEXT: ret{{[l|q]}}
866 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
870 define <8 x i64> @test_mm512_mask_shrdv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
871 ; X86-LABEL: test_mm512_mask_shrdv_epi32:
872 ; X86: # %bb.0: # %entry
873 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
874 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
877 ; X64-LABEL: test_mm512_mask_shrdv_epi32:
878 ; X64: # %bb.0: # %entry
879 ; X64-NEXT: kmovd %edi, %k1
880 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
883 %0 = bitcast <8 x i64> %__S to <16 x i32>
884 %1 = bitcast <8 x i64> %__A to <16 x i32>
885 %2 = bitcast <8 x i64> %__B to <16 x i32>
886 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
887 %4 = bitcast i16 %__U to <16 x i1>
888 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
889 %6 = bitcast <16 x i32> %5 to <8 x i64>
893 define <8 x i64> @test_mm512_maskz_shrdv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
894 ; X86-LABEL: test_mm512_maskz_shrdv_epi32:
895 ; X86: # %bb.0: # %entry
896 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
897 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
900 ; X64-LABEL: test_mm512_maskz_shrdv_epi32:
901 ; X64: # %bb.0: # %entry
902 ; X64-NEXT: kmovd %edi, %k1
903 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
906 %0 = bitcast <8 x i64> %__S to <16 x i32>
907 %1 = bitcast <8 x i64> %__A to <16 x i32>
908 %2 = bitcast <8 x i64> %__B to <16 x i32>
909 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
910 %4 = bitcast i16 %__U to <16 x i1>
911 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
912 %6 = bitcast <16 x i32> %5 to <8 x i64>
916 define <8 x i64> @test_mm512_shrdv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
917 ; CHECK-LABEL: test_mm512_shrdv_epi32:
918 ; CHECK: # %bb.0: # %entry
919 ; CHECK-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0
920 ; CHECK-NEXT: ret{{[l|q]}}
922 %0 = bitcast <8 x i64> %__S to <16 x i32>
923 %1 = bitcast <8 x i64> %__A to <16 x i32>
924 %2 = bitcast <8 x i64> %__B to <16 x i32>
925 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
926 %4 = bitcast <16 x i32> %3 to <8 x i64>
930 define <8 x i64> @test_mm512_mask_shrdv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
931 ; X86-LABEL: test_mm512_mask_shrdv_epi16:
932 ; X86: # %bb.0: # %entry
933 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
934 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
937 ; X64-LABEL: test_mm512_mask_shrdv_epi16:
938 ; X64: # %bb.0: # %entry
939 ; X64-NEXT: kmovd %edi, %k1
940 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
943 %0 = bitcast <8 x i64> %__S to <32 x i16>
944 %1 = bitcast <8 x i64> %__A to <32 x i16>
945 %2 = bitcast <8 x i64> %__B to <32 x i16>
946 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
947 %4 = bitcast i32 %__U to <32 x i1>
948 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
949 %6 = bitcast <32 x i16> %5 to <8 x i64>
953 define <8 x i64> @test_mm512_maskz_shrdv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
954 ; X86-LABEL: test_mm512_maskz_shrdv_epi16:
955 ; X86: # %bb.0: # %entry
956 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
957 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
960 ; X64-LABEL: test_mm512_maskz_shrdv_epi16:
961 ; X64: # %bb.0: # %entry
962 ; X64-NEXT: kmovd %edi, %k1
963 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
966 %0 = bitcast <8 x i64> %__S to <32 x i16>
967 %1 = bitcast <8 x i64> %__A to <32 x i16>
968 %2 = bitcast <8 x i64> %__B to <32 x i16>
969 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
970 %4 = bitcast i32 %__U to <32 x i1>
971 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
972 %6 = bitcast <32 x i16> %5 to <8 x i64>
976 define <8 x i64> @test_mm512_shrdv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
977 ; CHECK-LABEL: test_mm512_shrdv_epi16:
978 ; CHECK: # %bb.0: # %entry
979 ; CHECK-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0
980 ; CHECK-NEXT: ret{{[l|q]}}
982 %0 = bitcast <8 x i64> %__S to <32 x i16>
983 %1 = bitcast <8 x i64> %__A to <32 x i16>
984 %2 = bitcast <8 x i64> %__B to <32 x i16>
985 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
986 %4 = bitcast <32 x i16> %3 to <8 x i64>
990 declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
991 declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
992 declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
993 declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
994 declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
995 declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
996 declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
997 declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)