1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi2-builtins.c
7 define <8 x i64> @test_mm512_mask_compress_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
8 ; X86-LABEL: test_mm512_mask_compress_epi16:
9 ; X86: # %bb.0: # %entry
10 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
11 ; X86-NEXT: vpcompressw %zmm1, %zmm0 {%k1}
14 ; X64-LABEL: test_mm512_mask_compress_epi16:
15 ; X64: # %bb.0: # %entry
16 ; X64-NEXT: kmovd %edi, %k1
17 ; X64-NEXT: vpcompressw %zmm1, %zmm0 {%k1}
20 %0 = bitcast <8 x i64> %__D to <32 x i16>
21 %1 = bitcast <8 x i64> %__S to <32 x i16>
22 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
23 %3 = bitcast <32 x i16> %2 to <8 x i64>
27 define <8 x i64> @test_mm512_maskz_compress_epi16(i32 %__U, <8 x i64> %__D) {
28 ; X86-LABEL: test_mm512_maskz_compress_epi16:
29 ; X86: # %bb.0: # %entry
30 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
31 ; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z}
34 ; X64-LABEL: test_mm512_maskz_compress_epi16:
35 ; X64: # %bb.0: # %entry
36 ; X64-NEXT: kmovd %edi, %k1
37 ; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z}
40 %0 = bitcast <8 x i64> %__D to <32 x i16>
41 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
42 %2 = bitcast <32 x i16> %1 to <8 x i64>
46 define <8 x i64> @test_mm512_mask_compress_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
47 ; X86-LABEL: test_mm512_mask_compress_epi8:
48 ; X86: # %bb.0: # %entry
49 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
50 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
51 ; X86-NEXT: kunpckdq %k1, %k0, %k1
52 ; X86-NEXT: vpcompressb %zmm1, %zmm0 {%k1}
55 ; X64-LABEL: test_mm512_mask_compress_epi8:
56 ; X64: # %bb.0: # %entry
57 ; X64-NEXT: kmovq %rdi, %k1
58 ; X64-NEXT: vpcompressb %zmm1, %zmm0 {%k1}
61 %0 = bitcast <8 x i64> %__D to <64 x i8>
62 %1 = bitcast <8 x i64> %__S to <64 x i8>
63 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
64 %3 = bitcast <64 x i8> %2 to <8 x i64>
68 define <8 x i64> @test_mm512_maskz_compress_epi8(i64 %__U, <8 x i64> %__D) {
69 ; X86-LABEL: test_mm512_maskz_compress_epi8:
70 ; X86: # %bb.0: # %entry
71 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
72 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
73 ; X86-NEXT: kunpckdq %k1, %k0, %k1
74 ; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z}
77 ; X64-LABEL: test_mm512_maskz_compress_epi8:
78 ; X64: # %bb.0: # %entry
79 ; X64-NEXT: kmovq %rdi, %k1
80 ; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z}
83 %0 = bitcast <8 x i64> %__D to <64 x i8>
84 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
85 %2 = bitcast <64 x i8> %1 to <8 x i64>
89 define void @test_mm512_mask_compressstoreu_epi16(ptr %__P, i32 %__U, <8 x i64> %__D) {
90 ; X86-LABEL: test_mm512_mask_compressstoreu_epi16:
91 ; X86: # %bb.0: # %entry
92 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
93 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
94 ; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1}
95 ; X86-NEXT: vzeroupper
98 ; X64-LABEL: test_mm512_mask_compressstoreu_epi16:
99 ; X64: # %bb.0: # %entry
100 ; X64-NEXT: kmovd %esi, %k1
101 ; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1}
102 ; X64-NEXT: vzeroupper
105 %0 = bitcast <8 x i64> %__D to <32 x i16>
106 %1 = bitcast i32 %__U to <32 x i1>
107 tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, ptr %__P, <32 x i1> %1)
111 define void @test_mm512_mask_compressstoreu_epi8(ptr %__P, i64 %__U, <8 x i64> %__D) {
112 ; X86-LABEL: test_mm512_mask_compressstoreu_epi8:
113 ; X86: # %bb.0: # %entry
114 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
115 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
116 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
117 ; X86-NEXT: kunpckdq %k1, %k0, %k1
118 ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1}
119 ; X86-NEXT: vzeroupper
122 ; X64-LABEL: test_mm512_mask_compressstoreu_epi8:
123 ; X64: # %bb.0: # %entry
124 ; X64-NEXT: kmovq %rsi, %k1
125 ; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1}
126 ; X64-NEXT: vzeroupper
129 %0 = bitcast <8 x i64> %__D to <64 x i8>
130 %1 = bitcast i64 %__U to <64 x i1>
131 tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, ptr %__P, <64 x i1> %1)
135 define <8 x i64> @test_mm512_mask_expand_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
136 ; X86-LABEL: test_mm512_mask_expand_epi16:
137 ; X86: # %bb.0: # %entry
138 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
139 ; X86-NEXT: vpexpandw %zmm1, %zmm0 {%k1}
142 ; X64-LABEL: test_mm512_mask_expand_epi16:
143 ; X64: # %bb.0: # %entry
144 ; X64-NEXT: kmovd %edi, %k1
145 ; X64-NEXT: vpexpandw %zmm1, %zmm0 {%k1}
148 %0 = bitcast <8 x i64> %__D to <32 x i16>
149 %1 = bitcast <8 x i64> %__S to <32 x i16>
150 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
151 %3 = bitcast <32 x i16> %2 to <8 x i64>
155 define <8 x i64> @test_mm512_maskz_expand_epi16(i32 %__U, <8 x i64> %__D) {
156 ; X86-LABEL: test_mm512_maskz_expand_epi16:
157 ; X86: # %bb.0: # %entry
158 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
159 ; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z}
162 ; X64-LABEL: test_mm512_maskz_expand_epi16:
163 ; X64: # %bb.0: # %entry
164 ; X64-NEXT: kmovd %edi, %k1
165 ; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z}
168 %0 = bitcast <8 x i64> %__D to <32 x i16>
169 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
170 %2 = bitcast <32 x i16> %1 to <8 x i64>
174 define <8 x i64> @test_mm512_mask_expand_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
175 ; X86-LABEL: test_mm512_mask_expand_epi8:
176 ; X86: # %bb.0: # %entry
177 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
178 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
179 ; X86-NEXT: kunpckdq %k1, %k0, %k1
180 ; X86-NEXT: vpexpandb %zmm1, %zmm0 {%k1}
183 ; X64-LABEL: test_mm512_mask_expand_epi8:
184 ; X64: # %bb.0: # %entry
185 ; X64-NEXT: kmovq %rdi, %k1
186 ; X64-NEXT: vpexpandb %zmm1, %zmm0 {%k1}
189 %0 = bitcast <8 x i64> %__D to <64 x i8>
190 %1 = bitcast <8 x i64> %__S to <64 x i8>
191 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
192 %3 = bitcast <64 x i8> %2 to <8 x i64>
196 define <8 x i64> @test_mm512_maskz_expand_epi8(i64 %__U, <8 x i64> %__D) {
197 ; X86-LABEL: test_mm512_maskz_expand_epi8:
198 ; X86: # %bb.0: # %entry
199 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
200 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
201 ; X86-NEXT: kunpckdq %k1, %k0, %k1
202 ; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z}
205 ; X64-LABEL: test_mm512_maskz_expand_epi8:
206 ; X64: # %bb.0: # %entry
207 ; X64-NEXT: kmovq %rdi, %k1
208 ; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z}
211 %0 = bitcast <8 x i64> %__D to <64 x i8>
212 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
213 %2 = bitcast <64 x i8> %1 to <8 x i64>
217 define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, ptr readonly %__P) {
218 ; X86-LABEL: test_mm512_mask_expandloadu_epi16:
219 ; X86: # %bb.0: # %entry
220 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
221 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
222 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1}
225 ; X64-LABEL: test_mm512_mask_expandloadu_epi16:
226 ; X64: # %bb.0: # %entry
227 ; X64-NEXT: kmovd %edi, %k1
228 ; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1}
231 %0 = bitcast <8 x i64> %__S to <32 x i16>
232 %1 = bitcast i32 %__U to <32 x i1>
233 %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(ptr %__P, <32 x i1> %1, <32 x i16> %0)
234 %3 = bitcast <32 x i16> %2 to <8 x i64>
238 define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, ptr readonly %__P) {
239 ; X86-LABEL: test_mm512_maskz_expandloadu_epi16:
240 ; X86: # %bb.0: # %entry
241 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
242 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
243 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z}
246 ; X64-LABEL: test_mm512_maskz_expandloadu_epi16:
247 ; X64: # %bb.0: # %entry
248 ; X64-NEXT: kmovd %edi, %k1
249 ; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1} {z}
252 %0 = bitcast i32 %__U to <32 x i1>
253 %1 = tail call <32 x i16> @llvm.masked.expandload.v32i16(ptr %__P, <32 x i1> %0, <32 x i16> zeroinitializer)
254 %2 = bitcast <32 x i16> %1 to <8 x i64>
258 define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, ptr readonly %__P) {
259 ; X86-LABEL: test_mm512_mask_expandloadu_epi8:
260 ; X86: # %bb.0: # %entry
261 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
262 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
263 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
264 ; X86-NEXT: kunpckdq %k1, %k0, %k1
265 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1}
268 ; X64-LABEL: test_mm512_mask_expandloadu_epi8:
269 ; X64: # %bb.0: # %entry
270 ; X64-NEXT: kmovq %rdi, %k1
271 ; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1}
274 %0 = bitcast <8 x i64> %__S to <64 x i8>
275 %1 = bitcast i64 %__U to <64 x i1>
276 %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(ptr %__P, <64 x i1> %1, <64 x i8> %0)
277 %3 = bitcast <64 x i8> %2 to <8 x i64>
281 define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, ptr readonly %__P) {
282 ; X86-LABEL: test_mm512_maskz_expandloadu_epi8:
283 ; X86: # %bb.0: # %entry
284 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
285 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
286 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
287 ; X86-NEXT: kunpckdq %k1, %k0, %k1
288 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z}
291 ; X64-LABEL: test_mm512_maskz_expandloadu_epi8:
292 ; X64: # %bb.0: # %entry
293 ; X64-NEXT: kmovq %rdi, %k1
294 ; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1} {z}
297 %0 = bitcast i64 %__U to <64 x i1>
298 %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(ptr %__P, <64 x i1> %0, <64 x i8> zeroinitializer)
299 %2 = bitcast <64 x i8> %1 to <8 x i64>
303 define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
304 ; X86-LABEL: test_mm512_mask_shldi_epi64:
305 ; X86: # %bb.0: # %entry
306 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
307 ; X86-NEXT: kmovd %eax, %k1
308 ; X86-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
311 ; X64-LABEL: test_mm512_mask_shldi_epi64:
312 ; X64: # %bb.0: # %entry
313 ; X64-NEXT: kmovd %edi, %k1
314 ; X64-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
317 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
318 %1 = bitcast i8 %__U to <8 x i1>
319 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
323 declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
325 define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
326 ; X86-LABEL: test_mm512_maskz_shldi_epi64:
327 ; X86: # %bb.0: # %entry
328 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
329 ; X86-NEXT: kmovd %eax, %k1
330 ; X86-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
333 ; X64-LABEL: test_mm512_maskz_shldi_epi64:
334 ; X64: # %bb.0: # %entry
335 ; X64-NEXT: kmovd %edi, %k1
336 ; X64-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
339 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
340 %1 = bitcast i8 %__U to <8 x i1>
341 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
345 define <8 x i64> @test_mm512_shldi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
346 ; CHECK-LABEL: test_mm512_shldi_epi64:
347 ; CHECK: # %bb.0: # %entry
348 ; CHECK-NEXT: vpshldq $31, %zmm1, %zmm0, %zmm0
349 ; CHECK-NEXT: ret{{[l|q]}}
351 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
355 define <8 x i64> @test_mm512_mask_shldi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
356 ; X86-LABEL: test_mm512_mask_shldi_epi32:
357 ; X86: # %bb.0: # %entry
358 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
359 ; X86-NEXT: vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
362 ; X64-LABEL: test_mm512_mask_shldi_epi32:
363 ; X64: # %bb.0: # %entry
364 ; X64-NEXT: kmovd %edi, %k1
365 ; X64-NEXT: vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
368 %0 = bitcast <8 x i64> %__A to <16 x i32>
369 %1 = bitcast <8 x i64> %__B to <16 x i32>
370 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
371 %3 = bitcast <8 x i64> %__S to <16 x i32>
372 %4 = bitcast i16 %__U to <16 x i1>
373 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
374 %6 = bitcast <16 x i32> %5 to <8 x i64>
378 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
380 define <8 x i64> @test_mm512_maskz_shldi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
381 ; X86-LABEL: test_mm512_maskz_shldi_epi32:
382 ; X86: # %bb.0: # %entry
383 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
384 ; X86-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
387 ; X64-LABEL: test_mm512_maskz_shldi_epi32:
388 ; X64: # %bb.0: # %entry
389 ; X64-NEXT: kmovd %edi, %k1
390 ; X64-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
393 %0 = bitcast <8 x i64> %__A to <16 x i32>
394 %1 = bitcast <8 x i64> %__B to <16 x i32>
395 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
396 %3 = bitcast i16 %__U to <16 x i1>
397 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
398 %5 = bitcast <16 x i32> %4 to <8 x i64>
402 define <8 x i64> @test_mm512_shldi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
403 ; CHECK-LABEL: test_mm512_shldi_epi32:
404 ; CHECK: # %bb.0: # %entry
405 ; CHECK-NEXT: vpshldd $31, %zmm1, %zmm0, %zmm0
406 ; CHECK-NEXT: ret{{[l|q]}}
408 %0 = bitcast <8 x i64> %__A to <16 x i32>
409 %1 = bitcast <8 x i64> %__B to <16 x i32>
410 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
411 %3 = bitcast <16 x i32> %2 to <8 x i64>
415 define <8 x i64> @test_mm512_mask_shldi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
416 ; X86-LABEL: test_mm512_mask_shldi_epi16:
417 ; X86: # %bb.0: # %entry
418 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
419 ; X86-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
422 ; X64-LABEL: test_mm512_mask_shldi_epi16:
423 ; X64: # %bb.0: # %entry
424 ; X64-NEXT: kmovd %edi, %k1
425 ; X64-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
428 %0 = bitcast <8 x i64> %__A to <32 x i16>
429 %1 = bitcast <8 x i64> %__B to <32 x i16>
430 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
431 %3 = bitcast <8 x i64> %__S to <32 x i16>
432 %4 = bitcast i32 %__U to <32 x i1>
433 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
434 %6 = bitcast <32 x i16> %5 to <8 x i64>
438 declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
440 define <8 x i64> @test_mm512_maskz_shldi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
441 ; X86-LABEL: test_mm512_maskz_shldi_epi16:
442 ; X86: # %bb.0: # %entry
443 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
444 ; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
447 ; X64-LABEL: test_mm512_maskz_shldi_epi16:
448 ; X64: # %bb.0: # %entry
449 ; X64-NEXT: kmovd %edi, %k1
450 ; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
453 %0 = bitcast <8 x i64> %__A to <32 x i16>
454 %1 = bitcast <8 x i64> %__B to <32 x i16>
455 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
456 %3 = bitcast i32 %__U to <32 x i1>
457 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
458 %5 = bitcast <32 x i16> %4 to <8 x i64>
462 define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
463 ; CHECK-LABEL: test_mm512_shldi_epi16:
464 ; CHECK: # %bb.0: # %entry
465 ; CHECK-NEXT: vpshldw $15, %zmm1, %zmm0, %zmm0
466 ; CHECK-NEXT: ret{{[l|q]}}
468 %0 = bitcast <8 x i64> %__A to <32 x i16>
469 %1 = bitcast <8 x i64> %__B to <32 x i16>
470 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
471 %3 = bitcast <32 x i16> %2 to <8 x i64>
475 define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
476 ; X86-LABEL: test_mm512_mask_shrdi_epi64:
477 ; X86: # %bb.0: # %entry
478 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
479 ; X86-NEXT: kmovd %eax, %k1
480 ; X86-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
483 ; X64-LABEL: test_mm512_mask_shrdi_epi64:
484 ; X64: # %bb.0: # %entry
485 ; X64-NEXT: kmovd %edi, %k1
486 ; X64-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
489 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
490 %1 = bitcast i8 %__U to <8 x i1>
491 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
495 declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
497 define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
498 ; X86-LABEL: test_mm512_maskz_shrdi_epi64:
499 ; X86: # %bb.0: # %entry
500 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
501 ; X86-NEXT: kmovd %eax, %k1
502 ; X86-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
505 ; X64-LABEL: test_mm512_maskz_shrdi_epi64:
506 ; X64: # %bb.0: # %entry
507 ; X64-NEXT: kmovd %edi, %k1
508 ; X64-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
511 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
512 %1 = bitcast i8 %__U to <8 x i1>
513 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
517 define <8 x i64> @test_mm512_shrdi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
518 ; CHECK-LABEL: test_mm512_shrdi_epi64:
519 ; CHECK: # %bb.0: # %entry
520 ; CHECK-NEXT: vpshrdq $31, %zmm1, %zmm0, %zmm0
521 ; CHECK-NEXT: ret{{[l|q]}}
523 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
527 define <8 x i64> @test_mm512_mask_shrdi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
528 ; X86-LABEL: test_mm512_mask_shrdi_epi32:
529 ; X86: # %bb.0: # %entry
530 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
531 ; X86-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
534 ; X64-LABEL: test_mm512_mask_shrdi_epi32:
535 ; X64: # %bb.0: # %entry
536 ; X64-NEXT: kmovd %edi, %k1
537 ; X64-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
540 %0 = bitcast <8 x i64> %__A to <16 x i32>
541 %1 = bitcast <8 x i64> %__B to <16 x i32>
542 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
543 %3 = bitcast <8 x i64> %__S to <16 x i32>
544 %4 = bitcast i16 %__U to <16 x i1>
545 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
546 %6 = bitcast <16 x i32> %5 to <8 x i64>
550 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
552 define <8 x i64> @test_mm512_maskz_shrdi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
553 ; X86-LABEL: test_mm512_maskz_shrdi_epi32:
554 ; X86: # %bb.0: # %entry
555 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
556 ; X86-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
559 ; X64-LABEL: test_mm512_maskz_shrdi_epi32:
560 ; X64: # %bb.0: # %entry
561 ; X64-NEXT: kmovd %edi, %k1
562 ; X64-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
565 %0 = bitcast <8 x i64> %__A to <16 x i32>
566 %1 = bitcast <8 x i64> %__B to <16 x i32>
567 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
568 %3 = bitcast i16 %__U to <16 x i1>
569 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
570 %5 = bitcast <16 x i32> %4 to <8 x i64>
574 define <8 x i64> @test_mm512_shrdi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
575 ; CHECK-LABEL: test_mm512_shrdi_epi32:
576 ; CHECK: # %bb.0: # %entry
577 ; CHECK-NEXT: vpshrdd $31, %zmm1, %zmm0, %zmm0
578 ; CHECK-NEXT: ret{{[l|q]}}
580 %0 = bitcast <8 x i64> %__A to <16 x i32>
581 %1 = bitcast <8 x i64> %__B to <16 x i32>
582 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
583 %3 = bitcast <16 x i32> %2 to <8 x i64>
587 define <8 x i64> @test_mm512_mask_shrdi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
588 ; X86-LABEL: test_mm512_mask_shrdi_epi16:
589 ; X86: # %bb.0: # %entry
590 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
591 ; X86-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
594 ; X64-LABEL: test_mm512_mask_shrdi_epi16:
595 ; X64: # %bb.0: # %entry
596 ; X64-NEXT: kmovd %edi, %k1
597 ; X64-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
600 %0 = bitcast <8 x i64> %__A to <32 x i16>
601 %1 = bitcast <8 x i64> %__B to <32 x i16>
602 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
603 %3 = bitcast <8 x i64> %__S to <32 x i16>
604 %4 = bitcast i32 %__U to <32 x i1>
605 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
606 %6 = bitcast <32 x i16> %5 to <8 x i64>
610 declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
612 define <8 x i64> @test_mm512_maskz_shrdi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
613 ; X86-LABEL: test_mm512_maskz_shrdi_epi16:
614 ; X86: # %bb.0: # %entry
615 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
616 ; X86-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
619 ; X64-LABEL: test_mm512_maskz_shrdi_epi16:
620 ; X64: # %bb.0: # %entry
621 ; X64-NEXT: kmovd %edi, %k1
622 ; X64-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
625 %0 = bitcast <8 x i64> %__A to <32 x i16>
626 %1 = bitcast <8 x i64> %__B to <32 x i16>
627 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
628 %3 = bitcast i32 %__U to <32 x i1>
629 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
630 %5 = bitcast <32 x i16> %4 to <8 x i64>
634 define <8 x i64> @test_mm512_shrdi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
635 ; CHECK-LABEL: test_mm512_shrdi_epi16:
636 ; CHECK: # %bb.0: # %entry
637 ; CHECK-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0
638 ; CHECK-NEXT: ret{{[l|q]}}
640 %0 = bitcast <8 x i64> %__A to <32 x i16>
641 %1 = bitcast <8 x i64> %__B to <32 x i16>
642 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
643 %3 = bitcast <32 x i16> %2 to <8 x i64>
647 define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
648 ; X86-LABEL: test_mm512_mask_shldv_epi64:
649 ; X86: # %bb.0: # %entry
650 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
651 ; X86-NEXT: kmovd %eax, %k1
652 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
655 ; X64-LABEL: test_mm512_mask_shldv_epi64:
656 ; X64: # %bb.0: # %entry
657 ; X64-NEXT: kmovd %edi, %k1
658 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
661 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
662 %1 = bitcast i8 %__U to <8 x i1>
663 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
667 define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
668 ; X86-LABEL: test_mm512_maskz_shldv_epi64:
669 ; X86: # %bb.0: # %entry
670 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
671 ; X86-NEXT: kmovd %eax, %k1
672 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
675 ; X64-LABEL: test_mm512_maskz_shldv_epi64:
676 ; X64: # %bb.0: # %entry
677 ; X64-NEXT: kmovd %edi, %k1
678 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
681 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
682 %1 = bitcast i8 %__U to <8 x i1>
683 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
687 define <8 x i64> @test_mm512_shldv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
688 ; CHECK-LABEL: test_mm512_shldv_epi64:
689 ; CHECK: # %bb.0: # %entry
690 ; CHECK-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
691 ; CHECK-NEXT: ret{{[l|q]}}
693 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
697 define <8 x i64> @test_mm512_mask_shldv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
698 ; X86-LABEL: test_mm512_mask_shldv_epi32:
699 ; X86: # %bb.0: # %entry
700 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
701 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
704 ; X64-LABEL: test_mm512_mask_shldv_epi32:
705 ; X64: # %bb.0: # %entry
706 ; X64-NEXT: kmovd %edi, %k1
707 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
710 %0 = bitcast <8 x i64> %__S to <16 x i32>
711 %1 = bitcast <8 x i64> %__A to <16 x i32>
712 %2 = bitcast <8 x i64> %__B to <16 x i32>
713 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
714 %4 = bitcast i16 %__U to <16 x i1>
715 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
716 %6 = bitcast <16 x i32> %5 to <8 x i64>
720 define <8 x i64> @test_mm512_maskz_shldv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
721 ; X86-LABEL: test_mm512_maskz_shldv_epi32:
722 ; X86: # %bb.0: # %entry
723 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
724 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
727 ; X64-LABEL: test_mm512_maskz_shldv_epi32:
728 ; X64: # %bb.0: # %entry
729 ; X64-NEXT: kmovd %edi, %k1
730 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
733 %0 = bitcast <8 x i64> %__S to <16 x i32>
734 %1 = bitcast <8 x i64> %__A to <16 x i32>
735 %2 = bitcast <8 x i64> %__B to <16 x i32>
736 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
737 %4 = bitcast i16 %__U to <16 x i1>
738 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
739 %6 = bitcast <16 x i32> %5 to <8 x i64>
743 define <8 x i64> @test_mm512_shldv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
744 ; CHECK-LABEL: test_mm512_shldv_epi32:
745 ; CHECK: # %bb.0: # %entry
746 ; CHECK-NEXT: vpshldvd %zmm2, %zmm1, %zmm0
747 ; CHECK-NEXT: ret{{[l|q]}}
749 %0 = bitcast <8 x i64> %__S to <16 x i32>
750 %1 = bitcast <8 x i64> %__A to <16 x i32>
751 %2 = bitcast <8 x i64> %__B to <16 x i32>
752 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
753 %4 = bitcast <16 x i32> %3 to <8 x i64>
757 define <8 x i64> @test_mm512_mask_shldv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
758 ; X86-LABEL: test_mm512_mask_shldv_epi16:
759 ; X86: # %bb.0: # %entry
760 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
761 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
764 ; X64-LABEL: test_mm512_mask_shldv_epi16:
765 ; X64: # %bb.0: # %entry
766 ; X64-NEXT: kmovd %edi, %k1
767 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
770 %0 = bitcast <8 x i64> %__S to <32 x i16>
771 %1 = bitcast <8 x i64> %__A to <32 x i16>
772 %2 = bitcast <8 x i64> %__B to <32 x i16>
773 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
774 %4 = bitcast i32 %__U to <32 x i1>
775 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
776 %6 = bitcast <32 x i16> %5 to <8 x i64>
780 define <8 x i64> @test_mm512_maskz_shldv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
781 ; X86-LABEL: test_mm512_maskz_shldv_epi16:
782 ; X86: # %bb.0: # %entry
783 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
784 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
787 ; X64-LABEL: test_mm512_maskz_shldv_epi16:
788 ; X64: # %bb.0: # %entry
789 ; X64-NEXT: kmovd %edi, %k1
790 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
793 %0 = bitcast <8 x i64> %__S to <32 x i16>
794 %1 = bitcast <8 x i64> %__A to <32 x i16>
795 %2 = bitcast <8 x i64> %__B to <32 x i16>
796 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
797 %4 = bitcast i32 %__U to <32 x i1>
798 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
799 %6 = bitcast <32 x i16> %5 to <8 x i64>
803 define <8 x i64> @test_mm512_shldv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
804 ; CHECK-LABEL: test_mm512_shldv_epi16:
805 ; CHECK: # %bb.0: # %entry
806 ; CHECK-NEXT: vpshldvw %zmm2, %zmm1, %zmm0
807 ; CHECK-NEXT: ret{{[l|q]}}
809 %0 = bitcast <8 x i64> %__S to <32 x i16>
810 %1 = bitcast <8 x i64> %__A to <32 x i16>
811 %2 = bitcast <8 x i64> %__B to <32 x i16>
812 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
813 %4 = bitcast <32 x i16> %3 to <8 x i64>
817 define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
818 ; X86-LABEL: test_mm512_mask_shrdv_epi64:
819 ; X86: # %bb.0: # %entry
820 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
821 ; X86-NEXT: kmovd %eax, %k1
822 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
825 ; X64-LABEL: test_mm512_mask_shrdv_epi64:
826 ; X64: # %bb.0: # %entry
827 ; X64-NEXT: kmovd %edi, %k1
828 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
831 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
832 %1 = bitcast i8 %__U to <8 x i1>
833 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
837 define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
838 ; X86-LABEL: test_mm512_maskz_shrdv_epi64:
839 ; X86: # %bb.0: # %entry
840 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
841 ; X86-NEXT: kmovd %eax, %k1
842 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
845 ; X64-LABEL: test_mm512_maskz_shrdv_epi64:
846 ; X64: # %bb.0: # %entry
847 ; X64-NEXT: kmovd %edi, %k1
848 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
851 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
852 %1 = bitcast i8 %__U to <8 x i1>
853 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
857 define <8 x i64> @test_mm512_shrdv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
858 ; CHECK-LABEL: test_mm512_shrdv_epi64:
859 ; CHECK: # %bb.0: # %entry
860 ; CHECK-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0
861 ; CHECK-NEXT: ret{{[l|q]}}
863 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
867 define <8 x i64> @test_mm512_mask_shrdv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
868 ; X86-LABEL: test_mm512_mask_shrdv_epi32:
869 ; X86: # %bb.0: # %entry
870 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
871 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
874 ; X64-LABEL: test_mm512_mask_shrdv_epi32:
875 ; X64: # %bb.0: # %entry
876 ; X64-NEXT: kmovd %edi, %k1
877 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
880 %0 = bitcast <8 x i64> %__S to <16 x i32>
881 %1 = bitcast <8 x i64> %__A to <16 x i32>
882 %2 = bitcast <8 x i64> %__B to <16 x i32>
883 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
884 %4 = bitcast i16 %__U to <16 x i1>
885 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
886 %6 = bitcast <16 x i32> %5 to <8 x i64>
890 define <8 x i64> @test_mm512_maskz_shrdv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
891 ; X86-LABEL: test_mm512_maskz_shrdv_epi32:
892 ; X86: # %bb.0: # %entry
893 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
894 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
897 ; X64-LABEL: test_mm512_maskz_shrdv_epi32:
898 ; X64: # %bb.0: # %entry
899 ; X64-NEXT: kmovd %edi, %k1
900 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
903 %0 = bitcast <8 x i64> %__S to <16 x i32>
904 %1 = bitcast <8 x i64> %__A to <16 x i32>
905 %2 = bitcast <8 x i64> %__B to <16 x i32>
906 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
907 %4 = bitcast i16 %__U to <16 x i1>
908 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
909 %6 = bitcast <16 x i32> %5 to <8 x i64>
913 define <8 x i64> @test_mm512_shrdv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
914 ; CHECK-LABEL: test_mm512_shrdv_epi32:
915 ; CHECK: # %bb.0: # %entry
916 ; CHECK-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0
917 ; CHECK-NEXT: ret{{[l|q]}}
919 %0 = bitcast <8 x i64> %__S to <16 x i32>
920 %1 = bitcast <8 x i64> %__A to <16 x i32>
921 %2 = bitcast <8 x i64> %__B to <16 x i32>
922 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
923 %4 = bitcast <16 x i32> %3 to <8 x i64>
927 define <8 x i64> @test_mm512_mask_shrdv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
928 ; X86-LABEL: test_mm512_mask_shrdv_epi16:
929 ; X86: # %bb.0: # %entry
930 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
931 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
934 ; X64-LABEL: test_mm512_mask_shrdv_epi16:
935 ; X64: # %bb.0: # %entry
936 ; X64-NEXT: kmovd %edi, %k1
937 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
940 %0 = bitcast <8 x i64> %__S to <32 x i16>
941 %1 = bitcast <8 x i64> %__A to <32 x i16>
942 %2 = bitcast <8 x i64> %__B to <32 x i16>
943 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
944 %4 = bitcast i32 %__U to <32 x i1>
945 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
946 %6 = bitcast <32 x i16> %5 to <8 x i64>
950 define <8 x i64> @test_mm512_maskz_shrdv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
951 ; X86-LABEL: test_mm512_maskz_shrdv_epi16:
952 ; X86: # %bb.0: # %entry
953 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
954 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
957 ; X64-LABEL: test_mm512_maskz_shrdv_epi16:
958 ; X64: # %bb.0: # %entry
959 ; X64-NEXT: kmovd %edi, %k1
960 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
963 %0 = bitcast <8 x i64> %__S to <32 x i16>
964 %1 = bitcast <8 x i64> %__A to <32 x i16>
965 %2 = bitcast <8 x i64> %__B to <32 x i16>
966 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
967 %4 = bitcast i32 %__U to <32 x i1>
968 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
969 %6 = bitcast <32 x i16> %5 to <8 x i64>
973 define <8 x i64> @test_mm512_shrdv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
974 ; CHECK-LABEL: test_mm512_shrdv_epi16:
975 ; CHECK: # %bb.0: # %entry
976 ; CHECK-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0
977 ; CHECK-NEXT: ret{{[l|q]}}
979 %0 = bitcast <8 x i64> %__S to <32 x i16>
980 %1 = bitcast <8 x i64> %__A to <32 x i16>
981 %2 = bitcast <8 x i64> %__B to <32 x i16>
982 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
983 %4 = bitcast <32 x i16> %3 to <8 x i64>
987 declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
988 declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
989 declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>)
990 declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>)
991 declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
992 declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
993 declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>)
994 declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>)