1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi2-builtins.c
7 define <2 x i64> @test_mm_mask_compress_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
8 ; X86-LABEL: test_mm_mask_compress_epi16:
9 ; X86: # %bb.0: # %entry
10 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
11 ; X86-NEXT: kmovd %eax, %k1
12 ; X86-NEXT: vpcompressw %xmm1, %xmm0 {%k1}
15 ; X64-LABEL: test_mm_mask_compress_epi16:
16 ; X64: # %bb.0: # %entry
17 ; X64-NEXT: kmovd %edi, %k1
18 ; X64-NEXT: vpcompressw %xmm1, %xmm0 {%k1}
21 %0 = bitcast <2 x i64> %__D to <8 x i16>
22 %1 = bitcast <2 x i64> %__S to <8 x i16>
23 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U)
24 %3 = bitcast <8 x i16> %2 to <2 x i64>
28 define <2 x i64> @test_mm_maskz_compress_epi16(i8 zeroext %__U, <2 x i64> %__D) {
29 ; X86-LABEL: test_mm_maskz_compress_epi16:
30 ; X86: # %bb.0: # %entry
31 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
32 ; X86-NEXT: kmovd %eax, %k1
33 ; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z}
36 ; X64-LABEL: test_mm_maskz_compress_epi16:
37 ; X64: # %bb.0: # %entry
38 ; X64-NEXT: kmovd %edi, %k1
39 ; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z}
42 %0 = bitcast <2 x i64> %__D to <8 x i16>
43 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U)
44 %2 = bitcast <8 x i16> %1 to <2 x i64>
48 define <2 x i64> @test_mm_mask_compress_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) {
49 ; X86-LABEL: test_mm_mask_compress_epi8:
50 ; X86: # %bb.0: # %entry
51 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
52 ; X86-NEXT: vpcompressb %xmm1, %xmm0 {%k1}
55 ; X64-LABEL: test_mm_mask_compress_epi8:
56 ; X64: # %bb.0: # %entry
57 ; X64-NEXT: kmovd %edi, %k1
58 ; X64-NEXT: vpcompressb %xmm1, %xmm0 {%k1}
61 %0 = bitcast <2 x i64> %__D to <16 x i8>
62 %1 = bitcast <2 x i64> %__S to <16 x i8>
63 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U)
64 %3 = bitcast <16 x i8> %2 to <2 x i64>
68 define <2 x i64> @test_mm_maskz_compress_epi8(i16 zeroext %__U, <2 x i64> %__D) {
69 ; X86-LABEL: test_mm_maskz_compress_epi8:
70 ; X86: # %bb.0: # %entry
71 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
72 ; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z}
75 ; X64-LABEL: test_mm_maskz_compress_epi8:
76 ; X64: # %bb.0: # %entry
77 ; X64-NEXT: kmovd %edi, %k1
78 ; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z}
81 %0 = bitcast <2 x i64> %__D to <16 x i8>
82 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U)
83 %2 = bitcast <16 x i8> %1 to <2 x i64>
87 define void @test_mm_mask_compressstoreu_epi16(i8* %__P, i8 zeroext %__U, <2 x i64> %__D) {
88 ; X86-LABEL: test_mm_mask_compressstoreu_epi16:
89 ; X86: # %bb.0: # %entry
90 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
91 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
92 ; X86-NEXT: kmovd %eax, %k1
93 ; X86-NEXT: vpcompressw %xmm0, (%ecx) {%k1}
96 ; X64-LABEL: test_mm_mask_compressstoreu_epi16:
97 ; X64: # %bb.0: # %entry
98 ; X64-NEXT: kmovd %esi, %k1
99 ; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1}
102 %0 = bitcast <2 x i64> %__D to <8 x i16>
103 %1 = bitcast i8* %__P to i16*
104 %2 = bitcast i8 %__U to <8 x i1>
105 tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %0, i16* %1, <8 x i1> %2)
109 define void @test_mm_mask_compressstoreu_epi8(i8* %__P, i16 zeroext %__U, <2 x i64> %__D) {
110 ; X86-LABEL: test_mm_mask_compressstoreu_epi8:
111 ; X86: # %bb.0: # %entry
112 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
113 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
114 ; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1}
117 ; X64-LABEL: test_mm_mask_compressstoreu_epi8:
118 ; X64: # %bb.0: # %entry
119 ; X64-NEXT: kmovd %esi, %k1
120 ; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1}
123 %0 = bitcast <2 x i64> %__D to <16 x i8>
124 %1 = bitcast i16 %__U to <16 x i1>
125 tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %0, i8* %__P, <16 x i1> %1)
129 define <2 x i64> @test_mm_mask_expand_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
130 ; X86-LABEL: test_mm_mask_expand_epi16:
131 ; X86: # %bb.0: # %entry
132 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
133 ; X86-NEXT: kmovd %eax, %k1
134 ; X86-NEXT: vpexpandw %xmm1, %xmm0 {%k1}
137 ; X64-LABEL: test_mm_mask_expand_epi16:
138 ; X64: # %bb.0: # %entry
139 ; X64-NEXT: kmovd %edi, %k1
140 ; X64-NEXT: vpexpandw %xmm1, %xmm0 {%k1}
143 %0 = bitcast <2 x i64> %__D to <8 x i16>
144 %1 = bitcast <2 x i64> %__S to <8 x i16>
145 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U)
146 %3 = bitcast <8 x i16> %2 to <2 x i64>
150 define <2 x i64> @test_mm_maskz_expand_epi16(i8 zeroext %__U, <2 x i64> %__D) {
151 ; X86-LABEL: test_mm_maskz_expand_epi16:
152 ; X86: # %bb.0: # %entry
153 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
154 ; X86-NEXT: kmovd %eax, %k1
155 ; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z}
158 ; X64-LABEL: test_mm_maskz_expand_epi16:
159 ; X64: # %bb.0: # %entry
160 ; X64-NEXT: kmovd %edi, %k1
161 ; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z}
164 %0 = bitcast <2 x i64> %__D to <8 x i16>
165 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U)
166 %2 = bitcast <8 x i16> %1 to <2 x i64>
170 define <2 x i64> @test_mm_mask_expand_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) {
171 ; X86-LABEL: test_mm_mask_expand_epi8:
172 ; X86: # %bb.0: # %entry
173 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
174 ; X86-NEXT: vpexpandb %xmm1, %xmm0 {%k1}
177 ; X64-LABEL: test_mm_mask_expand_epi8:
178 ; X64: # %bb.0: # %entry
179 ; X64-NEXT: kmovd %edi, %k1
180 ; X64-NEXT: vpexpandb %xmm1, %xmm0 {%k1}
183 %0 = bitcast <2 x i64> %__D to <16 x i8>
184 %1 = bitcast <2 x i64> %__S to <16 x i8>
185 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U)
186 %3 = bitcast <16 x i8> %2 to <2 x i64>
190 define <2 x i64> @test_mm_maskz_expand_epi8(i16 zeroext %__U, <2 x i64> %__D) {
191 ; X86-LABEL: test_mm_maskz_expand_epi8:
192 ; X86: # %bb.0: # %entry
193 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
194 ; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z}
197 ; X64-LABEL: test_mm_maskz_expand_epi8:
198 ; X64: # %bb.0: # %entry
199 ; X64-NEXT: kmovd %edi, %k1
200 ; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z}
203 %0 = bitcast <2 x i64> %__D to <16 x i8>
204 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U)
205 %2 = bitcast <16 x i8> %1 to <2 x i64>
209 define <2 x i64> @test_mm_mask_expandloadu_epi16(<2 x i64> %__S, i8 zeroext %__U, i8* readonly %__P) {
210 ; X86-LABEL: test_mm_mask_expandloadu_epi16:
211 ; X86: # %bb.0: # %entry
212 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
213 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
214 ; X86-NEXT: kmovd %ecx, %k1
215 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1}
218 ; X64-LABEL: test_mm_mask_expandloadu_epi16:
219 ; X64: # %bb.0: # %entry
220 ; X64-NEXT: kmovd %edi, %k1
221 ; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1}
224 %0 = bitcast <2 x i64> %__S to <8 x i16>
225 %1 = bitcast i8* %__P to i16*
226 %2 = bitcast i8 %__U to <8 x i1>
227 %3 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> %0)
228 %4 = bitcast <8 x i16> %3 to <2 x i64>
232 define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, i8* readonly %__P) {
233 ; X86-LABEL: test_mm_maskz_expandloadu_epi16:
234 ; X86: # %bb.0: # %entry
235 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
236 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
237 ; X86-NEXT: kmovd %ecx, %k1
238 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z}
241 ; X64-LABEL: test_mm_maskz_expandloadu_epi16:
242 ; X64: # %bb.0: # %entry
243 ; X64-NEXT: kmovd %edi, %k1
244 ; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1} {z}
247 %0 = bitcast i8* %__P to i16*
248 %1 = bitcast i8 %__U to <8 x i1>
249 %2 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %0, <8 x i1> %1, <8 x i16> zeroinitializer)
250 %3 = bitcast <8 x i16> %2 to <2 x i64>
254 define <2 x i64> @test_mm_mask_expandloadu_epi8(<2 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) {
255 ; X86-LABEL: test_mm_mask_expandloadu_epi8:
256 ; X86: # %bb.0: # %entry
257 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
258 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
259 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1}
262 ; X64-LABEL: test_mm_mask_expandloadu_epi8:
263 ; X64: # %bb.0: # %entry
264 ; X64-NEXT: kmovd %edi, %k1
265 ; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1}
268 %0 = bitcast <2 x i64> %__S to <16 x i8>
269 %1 = bitcast i16 %__U to <16 x i1>
270 %2 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %1, <16 x i8> %0)
271 %3 = bitcast <16 x i8> %2 to <2 x i64>
275 define <2 x i64> @test_mm_maskz_expandloadu_epi8(i16 zeroext %__U, i8* readonly %__P) {
276 ; X86-LABEL: test_mm_maskz_expandloadu_epi8:
277 ; X86: # %bb.0: # %entry
278 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
279 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
280 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z}
283 ; X64-LABEL: test_mm_maskz_expandloadu_epi8:
284 ; X64: # %bb.0: # %entry
285 ; X64-NEXT: kmovd %edi, %k1
286 ; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1} {z}
289 %0 = bitcast i16 %__U to <16 x i1>
290 %1 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %0, <16 x i8> zeroinitializer)
291 %2 = bitcast <16 x i8> %1 to <2 x i64>
295 define <4 x i64> @test_mm256_mask_compress_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) {
296 ; X86-LABEL: test_mm256_mask_compress_epi16:
297 ; X86: # %bb.0: # %entry
298 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
299 ; X86-NEXT: vpcompressw %ymm1, %ymm0 {%k1}
302 ; X64-LABEL: test_mm256_mask_compress_epi16:
303 ; X64: # %bb.0: # %entry
304 ; X64-NEXT: kmovd %edi, %k1
305 ; X64-NEXT: vpcompressw %ymm1, %ymm0 {%k1}
308 %0 = bitcast <4 x i64> %__D to <16 x i16>
309 %1 = bitcast <4 x i64> %__S to <16 x i16>
310 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U)
311 %3 = bitcast <16 x i16> %2 to <4 x i64>
315 define <4 x i64> @test_mm256_maskz_compress_epi16(i16 zeroext %__U, <4 x i64> %__D) {
316 ; X86-LABEL: test_mm256_maskz_compress_epi16:
317 ; X86: # %bb.0: # %entry
318 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
319 ; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z}
322 ; X64-LABEL: test_mm256_maskz_compress_epi16:
323 ; X64: # %bb.0: # %entry
324 ; X64-NEXT: kmovd %edi, %k1
325 ; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z}
328 %0 = bitcast <4 x i64> %__D to <16 x i16>
329 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U)
330 %2 = bitcast <16 x i16> %1 to <4 x i64>
334 define <4 x i64> @test_mm256_mask_compress_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) {
335 ; X86-LABEL: test_mm256_mask_compress_epi8:
336 ; X86: # %bb.0: # %entry
337 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
338 ; X86-NEXT: vpcompressb %ymm1, %ymm0 {%k1}
341 ; X64-LABEL: test_mm256_mask_compress_epi8:
342 ; X64: # %bb.0: # %entry
343 ; X64-NEXT: kmovd %edi, %k1
344 ; X64-NEXT: vpcompressb %ymm1, %ymm0 {%k1}
347 %0 = bitcast <4 x i64> %__D to <32 x i8>
348 %1 = bitcast <4 x i64> %__S to <32 x i8>
349 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U)
350 %3 = bitcast <32 x i8> %2 to <4 x i64>
354 define <4 x i64> @test_mm256_maskz_compress_epi8(i32 %__U, <4 x i64> %__D) {
355 ; X86-LABEL: test_mm256_maskz_compress_epi8:
356 ; X86: # %bb.0: # %entry
357 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
358 ; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z}
361 ; X64-LABEL: test_mm256_maskz_compress_epi8:
362 ; X64: # %bb.0: # %entry
363 ; X64-NEXT: kmovd %edi, %k1
364 ; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z}
367 %0 = bitcast <4 x i64> %__D to <32 x i8>
368 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U)
369 %2 = bitcast <32 x i8> %1 to <4 x i64>
373 define void @test_mm256_mask_compressstoreu_epi16(i8* %__P, i16 zeroext %__U, <4 x i64> %__D) {
374 ; X86-LABEL: test_mm256_mask_compressstoreu_epi16:
375 ; X86: # %bb.0: # %entry
376 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
377 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
378 ; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1}
379 ; X86-NEXT: vzeroupper
382 ; X64-LABEL: test_mm256_mask_compressstoreu_epi16:
383 ; X64: # %bb.0: # %entry
384 ; X64-NEXT: kmovd %esi, %k1
385 ; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1}
386 ; X64-NEXT: vzeroupper
389 %0 = bitcast <4 x i64> %__D to <16 x i16>
390 %1 = bitcast i8* %__P to i16*
391 %2 = bitcast i16 %__U to <16 x i1>
392 tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %0, i16* %1, <16 x i1> %2)
396 define void @test_mm256_mask_compressstoreu_epi8(i8* %__P, i32 %__U, <4 x i64> %__D) {
397 ; X86-LABEL: test_mm256_mask_compressstoreu_epi8:
398 ; X86: # %bb.0: # %entry
399 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
400 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
401 ; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1}
402 ; X86-NEXT: vzeroupper
405 ; X64-LABEL: test_mm256_mask_compressstoreu_epi8:
406 ; X64: # %bb.0: # %entry
407 ; X64-NEXT: kmovd %esi, %k1
408 ; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1}
409 ; X64-NEXT: vzeroupper
412 %0 = bitcast <4 x i64> %__D to <32 x i8>
413 %1 = bitcast i32 %__U to <32 x i1>
414 tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %0, i8* %__P, <32 x i1> %1)
418 define <4 x i64> @test_mm256_mask_expand_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) {
419 ; X86-LABEL: test_mm256_mask_expand_epi16:
420 ; X86: # %bb.0: # %entry
421 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
422 ; X86-NEXT: vpexpandw %ymm1, %ymm0 {%k1}
425 ; X64-LABEL: test_mm256_mask_expand_epi16:
426 ; X64: # %bb.0: # %entry
427 ; X64-NEXT: kmovd %edi, %k1
428 ; X64-NEXT: vpexpandw %ymm1, %ymm0 {%k1}
431 %0 = bitcast <4 x i64> %__D to <16 x i16>
432 %1 = bitcast <4 x i64> %__S to <16 x i16>
433 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U)
434 %3 = bitcast <16 x i16> %2 to <4 x i64>
438 define <4 x i64> @test_mm256_maskz_expand_epi16(i16 zeroext %__U, <4 x i64> %__D) {
439 ; X86-LABEL: test_mm256_maskz_expand_epi16:
440 ; X86: # %bb.0: # %entry
441 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
442 ; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z}
445 ; X64-LABEL: test_mm256_maskz_expand_epi16:
446 ; X64: # %bb.0: # %entry
447 ; X64-NEXT: kmovd %edi, %k1
448 ; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z}
451 %0 = bitcast <4 x i64> %__D to <16 x i16>
452 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U)
453 %2 = bitcast <16 x i16> %1 to <4 x i64>
457 define <4 x i64> @test_mm256_mask_expand_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) {
458 ; X86-LABEL: test_mm256_mask_expand_epi8:
459 ; X86: # %bb.0: # %entry
460 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
461 ; X86-NEXT: vpexpandb %ymm1, %ymm0 {%k1}
464 ; X64-LABEL: test_mm256_mask_expand_epi8:
465 ; X64: # %bb.0: # %entry
466 ; X64-NEXT: kmovd %edi, %k1
467 ; X64-NEXT: vpexpandb %ymm1, %ymm0 {%k1}
470 %0 = bitcast <4 x i64> %__D to <32 x i8>
471 %1 = bitcast <4 x i64> %__S to <32 x i8>
472 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U)
473 %3 = bitcast <32 x i8> %2 to <4 x i64>
477 define <4 x i64> @test_mm256_maskz_expand_epi8(i32 %__U, <4 x i64> %__D) {
478 ; X86-LABEL: test_mm256_maskz_expand_epi8:
479 ; X86: # %bb.0: # %entry
480 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
481 ; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z}
484 ; X64-LABEL: test_mm256_maskz_expand_epi8:
485 ; X64: # %bb.0: # %entry
486 ; X64-NEXT: kmovd %edi, %k1
487 ; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z}
490 %0 = bitcast <4 x i64> %__D to <32 x i8>
491 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U)
492 %2 = bitcast <32 x i8> %1 to <4 x i64>
496 define <4 x i64> @test_mm256_mask_expandloadu_epi16(<4 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) {
497 ; X86-LABEL: test_mm256_mask_expandloadu_epi16:
498 ; X86: # %bb.0: # %entry
499 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
500 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
501 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1}
504 ; X64-LABEL: test_mm256_mask_expandloadu_epi16:
505 ; X64: # %bb.0: # %entry
506 ; X64-NEXT: kmovd %edi, %k1
507 ; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1}
510 %0 = bitcast <4 x i64> %__S to <16 x i16>
511 %1 = bitcast i8* %__P to i16*
512 %2 = bitcast i16 %__U to <16 x i1>
513 %3 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> %0)
514 %4 = bitcast <16 x i16> %3 to <4 x i64>
518 define <4 x i64> @test_mm256_maskz_expandloadu_epi16(i16 zeroext %__U, i8* readonly %__P) {
519 ; X86-LABEL: test_mm256_maskz_expandloadu_epi16:
520 ; X86: # %bb.0: # %entry
521 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
522 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
523 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z}
526 ; X64-LABEL: test_mm256_maskz_expandloadu_epi16:
527 ; X64: # %bb.0: # %entry
528 ; X64-NEXT: kmovd %edi, %k1
529 ; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1} {z}
532 %0 = bitcast i8* %__P to i16*
533 %1 = bitcast i16 %__U to <16 x i1>
534 %2 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %0, <16 x i1> %1, <16 x i16> zeroinitializer)
535 %3 = bitcast <16 x i16> %2 to <4 x i64>
539 define <4 x i64> @test_mm256_mask_expandloadu_epi8(<4 x i64> %__S, i32 %__U, i8* readonly %__P) {
540 ; X86-LABEL: test_mm256_mask_expandloadu_epi8:
541 ; X86: # %bb.0: # %entry
542 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
543 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
544 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1}
547 ; X64-LABEL: test_mm256_mask_expandloadu_epi8:
548 ; X64: # %bb.0: # %entry
549 ; X64-NEXT: kmovd %edi, %k1
550 ; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1}
553 %0 = bitcast <4 x i64> %__S to <32 x i8>
554 %1 = bitcast i32 %__U to <32 x i1>
555 %2 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %1, <32 x i8> %0)
556 %3 = bitcast <32 x i8> %2 to <4 x i64>
560 define <4 x i64> @test_mm256_maskz_expandloadu_epi8(i32 %__U, i8* readonly %__P) {
561 ; X86-LABEL: test_mm256_maskz_expandloadu_epi8:
562 ; X86: # %bb.0: # %entry
563 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
564 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
565 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z}
568 ; X64-LABEL: test_mm256_maskz_expandloadu_epi8:
569 ; X64: # %bb.0: # %entry
570 ; X64-NEXT: kmovd %edi, %k1
571 ; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1} {z}
574 %0 = bitcast i32 %__U to <32 x i1>
575 %1 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %0, <32 x i8> zeroinitializer)
576 %2 = bitcast <32 x i8> %1 to <4 x i64>
580 define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
581 ; X86-LABEL: test_mm256_mask_shldi_epi64:
582 ; X86: # %bb.0: # %entry
583 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
584 ; X86-NEXT: kmovd %eax, %k1
585 ; X86-NEXT: vpshldq $127, %ymm2, %ymm1, %ymm0 {%k1}
588 ; X64-LABEL: test_mm256_mask_shldi_epi64:
589 ; X64: # %bb.0: # %entry
590 ; X64-NEXT: kmovd %edi, %k1
591 ; X64-NEXT: vpshldq $127, %ymm2, %ymm1, %ymm0 {%k1}
594 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 127)
595 %1 = bitcast i8 %__U to <8 x i1>
596 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
597 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S
601 declare <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64>, <4 x i64>, i32)
603 define <4 x i64> @test_mm256_maskz_shldi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
604 ; X86-LABEL: test_mm256_maskz_shldi_epi64:
605 ; X86: # %bb.0: # %entry
606 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
607 ; X86-NEXT: kmovd %eax, %k1
608 ; X86-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
611 ; X64-LABEL: test_mm256_maskz_shldi_epi64:
612 ; X64: # %bb.0: # %entry
613 ; X64-NEXT: kmovd %edi, %k1
614 ; X64-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
617 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 63)
618 %1 = bitcast i8 %__U to <8 x i1>
619 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
620 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
624 define <4 x i64> @test_mm256_shldi_epi64(<4 x i64> %__A, <4 x i64> %__B) {
625 ; CHECK-LABEL: test_mm256_shldi_epi64:
626 ; CHECK: # %bb.0: # %entry
627 ; CHECK-NEXT: vpshldq $31, %ymm1, %ymm0, %ymm0
628 ; CHECK-NEXT: ret{{[l|q]}}
630 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 31)
634 define <2 x i64> @test_mm_mask_shldi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
635 ; X86-LABEL: test_mm_mask_shldi_epi64:
636 ; X86: # %bb.0: # %entry
637 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
638 ; X86-NEXT: kmovd %eax, %k1
639 ; X86-NEXT: vpshldq $127, %xmm2, %xmm1, %xmm0 {%k1}
642 ; X64-LABEL: test_mm_mask_shldi_epi64:
643 ; X64: # %bb.0: # %entry
644 ; X64-NEXT: kmovd %edi, %k1
645 ; X64-NEXT: vpshldq $127, %xmm2, %xmm1, %xmm0 {%k1}
648 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 127)
649 %1 = bitcast i8 %__U to <8 x i1>
650 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
651 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S
655 declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32) #3
657 define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
658 ; X86-LABEL: test_mm_maskz_shldi_epi64:
659 ; X86: # %bb.0: # %entry
660 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
661 ; X86-NEXT: kmovd %eax, %k1
662 ; X86-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
665 ; X64-LABEL: test_mm_maskz_shldi_epi64:
666 ; X64: # %bb.0: # %entry
667 ; X64-NEXT: kmovd %edi, %k1
668 ; X64-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
671 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 63)
672 %1 = bitcast i8 %__U to <8 x i1>
673 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
674 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
678 define <2 x i64> @test_mm_shldi_epi64(<2 x i64> %__A, <2 x i64> %__B) {
679 ; CHECK-LABEL: test_mm_shldi_epi64:
680 ; CHECK: # %bb.0: # %entry
681 ; CHECK-NEXT: vpshldq $31, %xmm1, %xmm0, %xmm0
682 ; CHECK-NEXT: ret{{[l|q]}}
684 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 31)
688 define <4 x i64> @test_mm256_mask_shldi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
689 ; X86-LABEL: test_mm256_mask_shldi_epi32:
690 ; X86: # %bb.0: # %entry
691 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
692 ; X86-NEXT: kmovd %eax, %k1
693 ; X86-NEXT: vpshldd $127, %ymm2, %ymm1, %ymm0 {%k1}
696 ; X64-LABEL: test_mm256_mask_shldi_epi32:
697 ; X64: # %bb.0: # %entry
698 ; X64-NEXT: kmovd %edi, %k1
699 ; X64-NEXT: vpshldd $127, %ymm2, %ymm1, %ymm0 {%k1}
702 %0 = bitcast <4 x i64> %__A to <8 x i32>
703 %1 = bitcast <4 x i64> %__B to <8 x i32>
704 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 127)
705 %3 = bitcast <4 x i64> %__S to <8 x i32>
706 %4 = bitcast i8 %__U to <8 x i1>
707 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
708 %6 = bitcast <8 x i32> %5 to <4 x i64>
712 declare <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32>, <8 x i32>, i32)
714 define <4 x i64> @test_mm256_maskz_shldi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
715 ; X86-LABEL: test_mm256_maskz_shldi_epi32:
716 ; X86: # %bb.0: # %entry
717 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
718 ; X86-NEXT: kmovd %eax, %k1
719 ; X86-NEXT: vpshldd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
722 ; X64-LABEL: test_mm256_maskz_shldi_epi32:
723 ; X64: # %bb.0: # %entry
724 ; X64-NEXT: kmovd %edi, %k1
725 ; X64-NEXT: vpshldd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
728 %0 = bitcast <4 x i64> %__A to <8 x i32>
729 %1 = bitcast <4 x i64> %__B to <8 x i32>
730 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 63)
731 %3 = bitcast i8 %__U to <8 x i1>
732 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
733 %5 = bitcast <8 x i32> %4 to <4 x i64>
737 define <4 x i64> @test_mm256_shldi_epi32(<4 x i64> %__A, <4 x i64> %__B) {
738 ; CHECK-LABEL: test_mm256_shldi_epi32:
739 ; CHECK: # %bb.0: # %entry
740 ; CHECK-NEXT: vpshldd $31, %ymm1, %ymm0, %ymm0
741 ; CHECK-NEXT: ret{{[l|q]}}
743 %0 = bitcast <4 x i64> %__A to <8 x i32>
744 %1 = bitcast <4 x i64> %__B to <8 x i32>
745 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 31)
746 %3 = bitcast <8 x i32> %2 to <4 x i64>
750 define <2 x i64> @test_mm_mask_shldi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
751 ; X86-LABEL: test_mm_mask_shldi_epi32:
752 ; X86: # %bb.0: # %entry
753 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
754 ; X86-NEXT: kmovd %eax, %k1
755 ; X86-NEXT: vpshldd $127, %xmm2, %xmm1, %xmm0 {%k1}
758 ; X64-LABEL: test_mm_mask_shldi_epi32:
759 ; X64: # %bb.0: # %entry
760 ; X64-NEXT: kmovd %edi, %k1
761 ; X64-NEXT: vpshldd $127, %xmm2, %xmm1, %xmm0 {%k1}
764 %0 = bitcast <2 x i64> %__A to <4 x i32>
765 %1 = bitcast <2 x i64> %__B to <4 x i32>
766 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 127)
767 %3 = bitcast <2 x i64> %__S to <4 x i32>
768 %4 = bitcast i8 %__U to <8 x i1>
769 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
770 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3
771 %6 = bitcast <4 x i32> %5 to <2 x i64>
775 declare <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32>, <4 x i32>, i32)
777 define <2 x i64> @test_mm_maskz_shldi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
778 ; X86-LABEL: test_mm_maskz_shldi_epi32:
779 ; X86: # %bb.0: # %entry
780 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
781 ; X86-NEXT: kmovd %eax, %k1
782 ; X86-NEXT: vpshldd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
785 ; X64-LABEL: test_mm_maskz_shldi_epi32:
786 ; X64: # %bb.0: # %entry
787 ; X64-NEXT: kmovd %edi, %k1
788 ; X64-NEXT: vpshldd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
791 %0 = bitcast <2 x i64> %__A to <4 x i32>
792 %1 = bitcast <2 x i64> %__B to <4 x i32>
793 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 63)
794 %3 = bitcast i8 %__U to <8 x i1>
795 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
796 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer
797 %5 = bitcast <4 x i32> %4 to <2 x i64>
801 define <2 x i64> @test_mm_shldi_epi32(<2 x i64> %__A, <2 x i64> %__B) {
802 ; CHECK-LABEL: test_mm_shldi_epi32:
803 ; CHECK: # %bb.0: # %entry
804 ; CHECK-NEXT: vpshldd $31, %xmm1, %xmm0, %xmm0
805 ; CHECK-NEXT: ret{{[l|q]}}
807 %0 = bitcast <2 x i64> %__A to <4 x i32>
808 %1 = bitcast <2 x i64> %__B to <4 x i32>
809 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 31)
810 %3 = bitcast <4 x i32> %2 to <2 x i64>
814 define <4 x i64> @test_mm256_mask_shldi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
815 ; X86-LABEL: test_mm256_mask_shldi_epi16:
816 ; X86: # %bb.0: # %entry
817 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
818 ; X86-NEXT: vpshldw $127, %ymm2, %ymm1, %ymm0 {%k1}
821 ; X64-LABEL: test_mm256_mask_shldi_epi16:
822 ; X64: # %bb.0: # %entry
823 ; X64-NEXT: kmovd %edi, %k1
824 ; X64-NEXT: vpshldw $127, %ymm2, %ymm1, %ymm0 {%k1}
827 %0 = bitcast <4 x i64> %__A to <16 x i16>
828 %1 = bitcast <4 x i64> %__B to <16 x i16>
829 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 127)
830 %3 = bitcast <4 x i64> %__S to <16 x i16>
831 %4 = bitcast i16 %__U to <16 x i1>
832 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3
833 %6 = bitcast <16 x i16> %5 to <4 x i64>
837 declare <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16>, <16 x i16>, i32)
839 define <4 x i64> @test_mm256_maskz_shldi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
840 ; X86-LABEL: test_mm256_maskz_shldi_epi16:
841 ; X86: # %bb.0: # %entry
842 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
843 ; X86-NEXT: vpshldw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
846 ; X64-LABEL: test_mm256_maskz_shldi_epi16:
847 ; X64: # %bb.0: # %entry
848 ; X64-NEXT: kmovd %edi, %k1
849 ; X64-NEXT: vpshldw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
852 %0 = bitcast <4 x i64> %__A to <16 x i16>
853 %1 = bitcast <4 x i64> %__B to <16 x i16>
854 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 63)
855 %3 = bitcast i16 %__U to <16 x i1>
856 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
857 %5 = bitcast <16 x i16> %4 to <4 x i64>
861 define <4 x i64> @test_mm256_shldi_epi16(<4 x i64> %__A, <4 x i64> %__B) {
862 ; CHECK-LABEL: test_mm256_shldi_epi16:
863 ; CHECK: # %bb.0: # %entry
864 ; CHECK-NEXT: vpshldw $31, %ymm1, %ymm0, %ymm0
865 ; CHECK-NEXT: ret{{[l|q]}}
867 %0 = bitcast <4 x i64> %__A to <16 x i16>
868 %1 = bitcast <4 x i64> %__B to <16 x i16>
869 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 31)
870 %3 = bitcast <16 x i16> %2 to <4 x i64>
874 define <2 x i64> @test_mm_mask_shldi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
875 ; X86-LABEL: test_mm_mask_shldi_epi16:
876 ; X86: # %bb.0: # %entry
877 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
878 ; X86-NEXT: kmovd %eax, %k1
879 ; X86-NEXT: vpshldw $127, %xmm2, %xmm1, %xmm0 {%k1}
882 ; X64-LABEL: test_mm_mask_shldi_epi16:
883 ; X64: # %bb.0: # %entry
884 ; X64-NEXT: kmovd %edi, %k1
885 ; X64-NEXT: vpshldw $127, %xmm2, %xmm1, %xmm0 {%k1}
888 %0 = bitcast <2 x i64> %__A to <8 x i16>
889 %1 = bitcast <2 x i64> %__B to <8 x i16>
890 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 127)
891 %3 = bitcast <2 x i64> %__S to <8 x i16>
892 %4 = bitcast i8 %__U to <8 x i1>
893 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3
894 %6 = bitcast <8 x i16> %5 to <2 x i64>
898 declare <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16>, <8 x i16>, i32)
900 define <2 x i64> @test_mm_maskz_shldi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
901 ; X86-LABEL: test_mm_maskz_shldi_epi16:
902 ; X86: # %bb.0: # %entry
903 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
904 ; X86-NEXT: kmovd %eax, %k1
905 ; X86-NEXT: vpshldw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
908 ; X64-LABEL: test_mm_maskz_shldi_epi16:
909 ; X64: # %bb.0: # %entry
910 ; X64-NEXT: kmovd %edi, %k1
911 ; X64-NEXT: vpshldw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
914 %0 = bitcast <2 x i64> %__A to <8 x i16>
915 %1 = bitcast <2 x i64> %__B to <8 x i16>
916 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 63)
917 %3 = bitcast i8 %__U to <8 x i1>
918 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
919 %5 = bitcast <8 x i16> %4 to <2 x i64>
923 define <2 x i64> @test_mm_shldi_epi16(<2 x i64> %__A, <2 x i64> %__B) {
924 ; CHECK-LABEL: test_mm_shldi_epi16:
925 ; CHECK: # %bb.0: # %entry
926 ; CHECK-NEXT: vpshldw $31, %xmm1, %xmm0, %xmm0
927 ; CHECK-NEXT: ret{{[l|q]}}
929 %0 = bitcast <2 x i64> %__A to <8 x i16>
930 %1 = bitcast <2 x i64> %__B to <8 x i16>
931 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 31)
932 %3 = bitcast <8 x i16> %2 to <2 x i64>
936 define <4 x i64> @test_mm256_mask_shrdi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
937 ; X86-LABEL: test_mm256_mask_shrdi_epi64:
938 ; X86: # %bb.0: # %entry
939 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
940 ; X86-NEXT: kmovd %eax, %k1
941 ; X86-NEXT: vpshrdq $127, %ymm2, %ymm1, %ymm0 {%k1}
944 ; X64-LABEL: test_mm256_mask_shrdi_epi64:
945 ; X64: # %bb.0: # %entry
946 ; X64-NEXT: kmovd %edi, %k1
947 ; X64-NEXT: vpshrdq $127, %ymm2, %ymm1, %ymm0 {%k1}
950 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 127)
951 %1 = bitcast i8 %__U to <8 x i1>
952 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
953 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S
957 declare <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64>, <4 x i64>, i32)
959 define <4 x i64> @test_mm256_maskz_shrdi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
960 ; X86-LABEL: test_mm256_maskz_shrdi_epi64:
961 ; X86: # %bb.0: # %entry
962 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
963 ; X86-NEXT: kmovd %eax, %k1
964 ; X86-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
967 ; X64-LABEL: test_mm256_maskz_shrdi_epi64:
968 ; X64: # %bb.0: # %entry
969 ; X64-NEXT: kmovd %edi, %k1
970 ; X64-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
973 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 63)
974 %1 = bitcast i8 %__U to <8 x i1>
975 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
976 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
980 define <4 x i64> @test_mm256_shrdi_epi64(<4 x i64> %__A, <4 x i64> %__B) {
981 ; CHECK-LABEL: test_mm256_shrdi_epi64:
982 ; CHECK: # %bb.0: # %entry
983 ; CHECK-NEXT: vpshrdq $31, %ymm1, %ymm0, %ymm0
984 ; CHECK-NEXT: ret{{[l|q]}}
986 %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 31)
990 define <2 x i64> @test_mm_mask_shrdi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
991 ; X86-LABEL: test_mm_mask_shrdi_epi64:
992 ; X86: # %bb.0: # %entry
993 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
994 ; X86-NEXT: kmovd %eax, %k1
995 ; X86-NEXT: vpshrdq $127, %xmm2, %xmm1, %xmm0 {%k1}
998 ; X64-LABEL: test_mm_mask_shrdi_epi64:
999 ; X64: # %bb.0: # %entry
1000 ; X64-NEXT: kmovd %edi, %k1
1001 ; X64-NEXT: vpshrdq $127, %xmm2, %xmm1, %xmm0 {%k1}
1004 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 127)
1005 %1 = bitcast i8 %__U to <8 x i1>
1006 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1007 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S
1011 declare <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64>, <2 x i64>, i32)
1013 define <2 x i64> @test_mm_maskz_shrdi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1014 ; X86-LABEL: test_mm_maskz_shrdi_epi64:
1015 ; X86: # %bb.0: # %entry
1016 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1017 ; X86-NEXT: kmovd %eax, %k1
1018 ; X86-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1021 ; X64-LABEL: test_mm_maskz_shrdi_epi64:
1022 ; X64: # %bb.0: # %entry
1023 ; X64-NEXT: kmovd %edi, %k1
1024 ; X64-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1027 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 63)
1028 %1 = bitcast i8 %__U to <8 x i1>
1029 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1030 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
1034 define <2 x i64> @test_mm_shrdi_epi64(<2 x i64> %__A, <2 x i64> %__B) {
1035 ; CHECK-LABEL: test_mm_shrdi_epi64:
1036 ; CHECK: # %bb.0: # %entry
1037 ; CHECK-NEXT: vpshrdq $31, %xmm1, %xmm0, %xmm0
1038 ; CHECK-NEXT: ret{{[l|q]}}
1040 %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 31)
1044 define <4 x i64> @test_mm256_mask_shrdi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1045 ; X86-LABEL: test_mm256_mask_shrdi_epi32:
1046 ; X86: # %bb.0: # %entry
1047 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1048 ; X86-NEXT: kmovd %eax, %k1
1049 ; X86-NEXT: vpshrdd $127, %ymm2, %ymm1, %ymm0 {%k1}
1052 ; X64-LABEL: test_mm256_mask_shrdi_epi32:
1053 ; X64: # %bb.0: # %entry
1054 ; X64-NEXT: kmovd %edi, %k1
1055 ; X64-NEXT: vpshrdd $127, %ymm2, %ymm1, %ymm0 {%k1}
1058 %0 = bitcast <4 x i64> %__A to <8 x i32>
1059 %1 = bitcast <4 x i64> %__B to <8 x i32>
1060 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 127)
1061 %3 = bitcast <4 x i64> %__S to <8 x i32>
1062 %4 = bitcast i8 %__U to <8 x i1>
1063 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
1064 %6 = bitcast <8 x i32> %5 to <4 x i64>
1068 declare <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32>, <8 x i32>, i32)
1070 define <4 x i64> @test_mm256_maskz_shrdi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1071 ; X86-LABEL: test_mm256_maskz_shrdi_epi32:
1072 ; X86: # %bb.0: # %entry
1073 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1074 ; X86-NEXT: kmovd %eax, %k1
1075 ; X86-NEXT: vpshrdd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
1078 ; X64-LABEL: test_mm256_maskz_shrdi_epi32:
1079 ; X64: # %bb.0: # %entry
1080 ; X64-NEXT: kmovd %edi, %k1
1081 ; X64-NEXT: vpshrdd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
1084 %0 = bitcast <4 x i64> %__A to <8 x i32>
1085 %1 = bitcast <4 x i64> %__B to <8 x i32>
1086 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 63)
1087 %3 = bitcast i8 %__U to <8 x i1>
1088 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
1089 %5 = bitcast <8 x i32> %4 to <4 x i64>
1093 define <4 x i64> @test_mm256_shrdi_epi32(<4 x i64> %__A, <4 x i64> %__B) {
1094 ; CHECK-LABEL: test_mm256_shrdi_epi32:
1095 ; CHECK: # %bb.0: # %entry
1096 ; CHECK-NEXT: vpshrdd $31, %ymm1, %ymm0, %ymm0
1097 ; CHECK-NEXT: ret{{[l|q]}}
1099 %0 = bitcast <4 x i64> %__A to <8 x i32>
1100 %1 = bitcast <4 x i64> %__B to <8 x i32>
1101 %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 31)
1102 %3 = bitcast <8 x i32> %2 to <4 x i64>
1106 define <2 x i64> @test_mm_mask_shrdi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1107 ; X86-LABEL: test_mm_mask_shrdi_epi32:
1108 ; X86: # %bb.0: # %entry
1109 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1110 ; X86-NEXT: kmovd %eax, %k1
1111 ; X86-NEXT: vpshrdd $127, %xmm2, %xmm1, %xmm0 {%k1}
1114 ; X64-LABEL: test_mm_mask_shrdi_epi32:
1115 ; X64: # %bb.0: # %entry
1116 ; X64-NEXT: kmovd %edi, %k1
1117 ; X64-NEXT: vpshrdd $127, %xmm2, %xmm1, %xmm0 {%k1}
1120 %0 = bitcast <2 x i64> %__A to <4 x i32>
1121 %1 = bitcast <2 x i64> %__B to <4 x i32>
1122 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 127)
1123 %3 = bitcast <2 x i64> %__S to <4 x i32>
1124 %4 = bitcast i8 %__U to <8 x i1>
1125 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1126 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3
1127 %6 = bitcast <4 x i32> %5 to <2 x i64>
1131 declare <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32>, <4 x i32>, i32)
1133 define <2 x i64> @test_mm_maskz_shrdi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1134 ; X86-LABEL: test_mm_maskz_shrdi_epi32:
1135 ; X86: # %bb.0: # %entry
1136 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1137 ; X86-NEXT: kmovd %eax, %k1
1138 ; X86-NEXT: vpshrdd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1141 ; X64-LABEL: test_mm_maskz_shrdi_epi32:
1142 ; X64: # %bb.0: # %entry
1143 ; X64-NEXT: kmovd %edi, %k1
1144 ; X64-NEXT: vpshrdd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1147 %0 = bitcast <2 x i64> %__A to <4 x i32>
1148 %1 = bitcast <2 x i64> %__B to <4 x i32>
1149 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 63)
1150 %3 = bitcast i8 %__U to <8 x i1>
1151 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer
1153 %5 = bitcast <4 x i32> %4 to <2 x i64>
1157 define <2 x i64> @test_mm_shrdi_epi32(<2 x i64> %__A, <2 x i64> %__B) {
1158 ; CHECK-LABEL: test_mm_shrdi_epi32:
1159 ; CHECK: # %bb.0: # %entry
1160 ; CHECK-NEXT: vpshrdd $31, %xmm1, %xmm0, %xmm0
1161 ; CHECK-NEXT: ret{{[l|q]}}
1163 %0 = bitcast <2 x i64> %__A to <4 x i32>
1164 %1 = bitcast <2 x i64> %__B to <4 x i32>
1165 %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 31)
1166 %3 = bitcast <4 x i32> %2 to <2 x i64>
1170 define <4 x i64> @test_mm256_mask_shrdi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1171 ; X86-LABEL: test_mm256_mask_shrdi_epi16:
1172 ; X86: # %bb.0: # %entry
1173 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1174 ; X86-NEXT: vpshrdw $127, %ymm2, %ymm1, %ymm0 {%k1}
1177 ; X64-LABEL: test_mm256_mask_shrdi_epi16:
1178 ; X64: # %bb.0: # %entry
1179 ; X64-NEXT: kmovd %edi, %k1
1180 ; X64-NEXT: vpshrdw $127, %ymm2, %ymm1, %ymm0 {%k1}
1183 %0 = bitcast <4 x i64> %__A to <16 x i16>
1184 %1 = bitcast <4 x i64> %__B to <16 x i16>
1185 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 127)
1186 %3 = bitcast <4 x i64> %__S to <16 x i16>
1187 %4 = bitcast i16 %__U to <16 x i1>
1188 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3
1189 %6 = bitcast <16 x i16> %5 to <4 x i64>
1193 declare <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16>, <16 x i16>, i32)
1195 define <4 x i64> @test_mm256_maskz_shrdi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1196 ; X86-LABEL: test_mm256_maskz_shrdi_epi16:
1197 ; X86: # %bb.0: # %entry
1198 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1199 ; X86-NEXT: vpshrdw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
1202 ; X64-LABEL: test_mm256_maskz_shrdi_epi16:
1203 ; X64: # %bb.0: # %entry
1204 ; X64-NEXT: kmovd %edi, %k1
1205 ; X64-NEXT: vpshrdw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
1208 %0 = bitcast <4 x i64> %__A to <16 x i16>
1209 %1 = bitcast <4 x i64> %__B to <16 x i16>
1210 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 63)
1211 %3 = bitcast i16 %__U to <16 x i1>
1212 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
1213 %5 = bitcast <16 x i16> %4 to <4 x i64>
1217 define <4 x i64> @test_mm256_shrdi_epi16(<4 x i64> %__A, <4 x i64> %__B) {
1218 ; CHECK-LABEL: test_mm256_shrdi_epi16:
1219 ; CHECK: # %bb.0: # %entry
1220 ; CHECK-NEXT: vpshrdw $31, %ymm1, %ymm0, %ymm0
1221 ; CHECK-NEXT: ret{{[l|q]}}
1223 %0 = bitcast <4 x i64> %__A to <16 x i16>
1224 %1 = bitcast <4 x i64> %__B to <16 x i16>
1225 %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 31)
1226 %3 = bitcast <16 x i16> %2 to <4 x i64>
1230 define <2 x i64> @test_mm_mask_shrdi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1231 ; X86-LABEL: test_mm_mask_shrdi_epi16:
1232 ; X86: # %bb.0: # %entry
1233 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1234 ; X86-NEXT: kmovd %eax, %k1
1235 ; X86-NEXT: vpshrdw $127, %xmm2, %xmm1, %xmm0 {%k1}
1238 ; X64-LABEL: test_mm_mask_shrdi_epi16:
1239 ; X64: # %bb.0: # %entry
1240 ; X64-NEXT: kmovd %edi, %k1
1241 ; X64-NEXT: vpshrdw $127, %xmm2, %xmm1, %xmm0 {%k1}
1244 %0 = bitcast <2 x i64> %__A to <8 x i16>
1245 %1 = bitcast <2 x i64> %__B to <8 x i16>
1246 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 127)
1247 %3 = bitcast <2 x i64> %__S to <8 x i16>
1248 %4 = bitcast i8 %__U to <8 x i1>
1249 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3
1250 %6 = bitcast <8 x i16> %5 to <2 x i64>
1254 declare <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16>, <8 x i16>, i32)
1256 define <2 x i64> @test_mm_maskz_shrdi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1257 ; X86-LABEL: test_mm_maskz_shrdi_epi16:
1258 ; X86: # %bb.0: # %entry
1259 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1260 ; X86-NEXT: kmovd %eax, %k1
1261 ; X86-NEXT: vpshrdw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1264 ; X64-LABEL: test_mm_maskz_shrdi_epi16:
1265 ; X64: # %bb.0: # %entry
1266 ; X64-NEXT: kmovd %edi, %k1
1267 ; X64-NEXT: vpshrdw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1270 %0 = bitcast <2 x i64> %__A to <8 x i16>
1271 %1 = bitcast <2 x i64> %__B to <8 x i16>
1272 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 63)
1273 %3 = bitcast i8 %__U to <8 x i1>
1274 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
1275 %5 = bitcast <8 x i16> %4 to <2 x i64>
1279 define <2 x i64> @test_mm_shrdi_epi16(<2 x i64> %__A, <2 x i64> %__B) {
1280 ; CHECK-LABEL: test_mm_shrdi_epi16:
1281 ; CHECK: # %bb.0: # %entry
1282 ; CHECK-NEXT: vpshrdw $31, %xmm1, %xmm0, %xmm0
1283 ; CHECK-NEXT: ret{{[l|q]}}
1285 %0 = bitcast <2 x i64> %__A to <8 x i16>
1286 %1 = bitcast <2 x i64> %__B to <8 x i16>
1287 %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 31)
1288 %3 = bitcast <8 x i16> %2 to <2 x i64>
1292 define <4 x i64> @test_mm256_mask_shldv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1293 ; X86-LABEL: test_mm256_mask_shldv_epi64:
1294 ; X86: # %bb.0: # %entry
1295 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1296 ; X86-NEXT: kmovd %eax, %k1
1297 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1}
1300 ; X64-LABEL: test_mm256_mask_shldv_epi64:
1301 ; X64: # %bb.0: # %entry
1302 ; X64-NEXT: kmovd %edi, %k1
1303 ; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1}
1306 %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U)
1310 define <4 x i64> @test_mm256_maskz_shldv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1311 ; X86-LABEL: test_mm256_maskz_shldv_epi64:
1312 ; X86: # %bb.0: # %entry
1313 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1314 ; X86-NEXT: kmovd %eax, %k1
1315 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1318 ; X64-LABEL: test_mm256_maskz_shldv_epi64:
1319 ; X64: # %bb.0: # %entry
1320 ; X64-NEXT: kmovd %edi, %k1
1321 ; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1324 %0 = tail call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U)
1328 define <4 x i64> @test_mm256_shldv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1329 ; CHECK-LABEL: test_mm256_shldv_epi64:
1330 ; CHECK: # %bb.0: # %entry
1331 ; CHECK-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
1332 ; CHECK-NEXT: ret{{[l|q]}}
1334 %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 -1)
1338 define <2 x i64> @test_mm_mask_shldv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1339 ; X86-LABEL: test_mm_mask_shldv_epi64:
1340 ; X86: # %bb.0: # %entry
1341 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1342 ; X86-NEXT: kmovd %eax, %k1
1343 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1}
1346 ; X64-LABEL: test_mm_mask_shldv_epi64:
1347 ; X64: # %bb.0: # %entry
1348 ; X64-NEXT: kmovd %edi, %k1
1349 ; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1}
1352 %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U)
1356 define <2 x i64> @test_mm_maskz_shldv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1357 ; X86-LABEL: test_mm_maskz_shldv_epi64:
1358 ; X86: # %bb.0: # %entry
1359 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1360 ; X86-NEXT: kmovd %eax, %k1
1361 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1364 ; X64-LABEL: test_mm_maskz_shldv_epi64:
1365 ; X64: # %bb.0: # %entry
1366 ; X64-NEXT: kmovd %edi, %k1
1367 ; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1370 %0 = tail call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U)
1374 define <2 x i64> @test_mm_shldv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1375 ; CHECK-LABEL: test_mm_shldv_epi64:
1376 ; CHECK: # %bb.0: # %entry
1377 ; CHECK-NEXT: vpshldvq %xmm2, %xmm1, %xmm0
1378 ; CHECK-NEXT: ret{{[l|q]}}
1380 %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 -1)
1384 define <4 x i64> @test_mm256_mask_shldv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1385 ; X86-LABEL: test_mm256_mask_shldv_epi32:
1386 ; X86: # %bb.0: # %entry
1387 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1388 ; X86-NEXT: kmovd %eax, %k1
1389 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1}
1392 ; X64-LABEL: test_mm256_mask_shldv_epi32:
1393 ; X64: # %bb.0: # %entry
1394 ; X64-NEXT: kmovd %edi, %k1
1395 ; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1}
1398 %0 = bitcast <4 x i64> %__S to <8 x i32>
1399 %1 = bitcast <4 x i64> %__A to <8 x i32>
1400 %2 = bitcast <4 x i64> %__B to <8 x i32>
1401 %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U)
1402 %4 = bitcast <8 x i32> %3 to <4 x i64>
1406 define <4 x i64> @test_mm256_maskz_shldv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1407 ; X86-LABEL: test_mm256_maskz_shldv_epi32:
1408 ; X86: # %bb.0: # %entry
1409 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1410 ; X86-NEXT: kmovd %eax, %k1
1411 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1414 ; X64-LABEL: test_mm256_maskz_shldv_epi32:
1415 ; X64: # %bb.0: # %entry
1416 ; X64-NEXT: kmovd %edi, %k1
1417 ; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1420 %0 = bitcast <4 x i64> %__S to <8 x i32>
1421 %1 = bitcast <4 x i64> %__A to <8 x i32>
1422 %2 = bitcast <4 x i64> %__B to <8 x i32>
1423 %3 = tail call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U)
1424 %4 = bitcast <8 x i32> %3 to <4 x i64>
1428 define <4 x i64> @test_mm256_shldv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1429 ; CHECK-LABEL: test_mm256_shldv_epi32:
1430 ; CHECK: # %bb.0: # %entry
1431 ; CHECK-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
1432 ; CHECK-NEXT: ret{{[l|q]}}
1434 %0 = bitcast <4 x i64> %__S to <8 x i32>
1435 %1 = bitcast <4 x i64> %__A to <8 x i32>
1436 %2 = bitcast <4 x i64> %__B to <8 x i32>
1437 %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 -1)
1438 %4 = bitcast <8 x i32> %3 to <4 x i64>
1442 define <2 x i64> @test_mm_mask_shldv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1443 ; X86-LABEL: test_mm_mask_shldv_epi32:
1444 ; X86: # %bb.0: # %entry
1445 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1446 ; X86-NEXT: kmovd %eax, %k1
1447 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1}
1450 ; X64-LABEL: test_mm_mask_shldv_epi32:
1451 ; X64: # %bb.0: # %entry
1452 ; X64-NEXT: kmovd %edi, %k1
1453 ; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1}
1456 %0 = bitcast <2 x i64> %__S to <4 x i32>
1457 %1 = bitcast <2 x i64> %__A to <4 x i32>
1458 %2 = bitcast <2 x i64> %__B to <4 x i32>
1459 %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U)
1460 %4 = bitcast <4 x i32> %3 to <2 x i64>
1464 define <2 x i64> @test_mm_maskz_shldv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1465 ; X86-LABEL: test_mm_maskz_shldv_epi32:
1466 ; X86: # %bb.0: # %entry
1467 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1468 ; X86-NEXT: kmovd %eax, %k1
1469 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1472 ; X64-LABEL: test_mm_maskz_shldv_epi32:
1473 ; X64: # %bb.0: # %entry
1474 ; X64-NEXT: kmovd %edi, %k1
1475 ; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1478 %0 = bitcast <2 x i64> %__S to <4 x i32>
1479 %1 = bitcast <2 x i64> %__A to <4 x i32>
1480 %2 = bitcast <2 x i64> %__B to <4 x i32>
1481 %3 = tail call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U)
1482 %4 = bitcast <4 x i32> %3 to <2 x i64>
1486 define <2 x i64> @test_mm_shldv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1487 ; CHECK-LABEL: test_mm_shldv_epi32:
1488 ; CHECK: # %bb.0: # %entry
1489 ; CHECK-NEXT: vpshldvd %xmm2, %xmm1, %xmm0
1490 ; CHECK-NEXT: ret{{[l|q]}}
1492 %0 = bitcast <2 x i64> %__S to <4 x i32>
1493 %1 = bitcast <2 x i64> %__A to <4 x i32>
1494 %2 = bitcast <2 x i64> %__B to <4 x i32>
1495 %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 -1)
1496 %4 = bitcast <4 x i32> %3 to <2 x i64>
1500 define <4 x i64> @test_mm256_mask_shldv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1501 ; X86-LABEL: test_mm256_mask_shldv_epi16:
1502 ; X86: # %bb.0: # %entry
1503 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1504 ; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1}
1507 ; X64-LABEL: test_mm256_mask_shldv_epi16:
1508 ; X64: # %bb.0: # %entry
1509 ; X64-NEXT: kmovd %edi, %k1
1510 ; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1}
1513 %0 = bitcast <4 x i64> %__S to <16 x i16>
1514 %1 = bitcast <4 x i64> %__A to <16 x i16>
1515 %2 = bitcast <4 x i64> %__B to <16 x i16>
1516 %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U)
1517 %4 = bitcast <16 x i16> %3 to <4 x i64>
1521 define <4 x i64> @test_mm256_maskz_shldv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1522 ; X86-LABEL: test_mm256_maskz_shldv_epi16:
1523 ; X86: # %bb.0: # %entry
1524 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1525 ; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1528 ; X64-LABEL: test_mm256_maskz_shldv_epi16:
1529 ; X64: # %bb.0: # %entry
1530 ; X64-NEXT: kmovd %edi, %k1
1531 ; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1534 %0 = bitcast <4 x i64> %__S to <16 x i16>
1535 %1 = bitcast <4 x i64> %__A to <16 x i16>
1536 %2 = bitcast <4 x i64> %__B to <16 x i16>
1537 %3 = tail call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U)
1538 %4 = bitcast <16 x i16> %3 to <4 x i64>
1542 define <4 x i64> @test_mm256_shldv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1543 ; CHECK-LABEL: test_mm256_shldv_epi16:
1544 ; CHECK: # %bb.0: # %entry
1545 ; CHECK-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
1546 ; CHECK-NEXT: ret{{[l|q]}}
1548 %0 = bitcast <4 x i64> %__S to <16 x i16>
1549 %1 = bitcast <4 x i64> %__A to <16 x i16>
1550 %2 = bitcast <4 x i64> %__B to <16 x i16>
1551 %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 -1)
1552 %4 = bitcast <16 x i16> %3 to <4 x i64>
1556 define <2 x i64> @test_mm_mask_shldv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1557 ; X86-LABEL: test_mm_mask_shldv_epi16:
1558 ; X86: # %bb.0: # %entry
1559 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1560 ; X86-NEXT: kmovd %eax, %k1
1561 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1}
1564 ; X64-LABEL: test_mm_mask_shldv_epi16:
1565 ; X64: # %bb.0: # %entry
1566 ; X64-NEXT: kmovd %edi, %k1
1567 ; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1}
1570 %0 = bitcast <2 x i64> %__S to <8 x i16>
1571 %1 = bitcast <2 x i64> %__A to <8 x i16>
1572 %2 = bitcast <2 x i64> %__B to <8 x i16>
1573 %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U)
1574 %4 = bitcast <8 x i16> %3 to <2 x i64>
1578 define <2 x i64> @test_mm_maskz_shldv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1579 ; X86-LABEL: test_mm_maskz_shldv_epi16:
1580 ; X86: # %bb.0: # %entry
1581 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1582 ; X86-NEXT: kmovd %eax, %k1
1583 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1586 ; X64-LABEL: test_mm_maskz_shldv_epi16:
1587 ; X64: # %bb.0: # %entry
1588 ; X64-NEXT: kmovd %edi, %k1
1589 ; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1592 %0 = bitcast <2 x i64> %__S to <8 x i16>
1593 %1 = bitcast <2 x i64> %__A to <8 x i16>
1594 %2 = bitcast <2 x i64> %__B to <8 x i16>
1595 %3 = tail call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U)
1596 %4 = bitcast <8 x i16> %3 to <2 x i64>
1600 define <2 x i64> @test_mm_shldv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1601 ; CHECK-LABEL: test_mm_shldv_epi16:
1602 ; CHECK: # %bb.0: # %entry
1603 ; CHECK-NEXT: vpshldvw %xmm2, %xmm1, %xmm0
1604 ; CHECK-NEXT: ret{{[l|q]}}
1606 %0 = bitcast <2 x i64> %__S to <8 x i16>
1607 %1 = bitcast <2 x i64> %__A to <8 x i16>
1608 %2 = bitcast <2 x i64> %__B to <8 x i16>
1609 %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 -1)
1610 %4 = bitcast <8 x i16> %3 to <2 x i64>
1614 define <4 x i64> @test_mm256_mask_shrdv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1615 ; X86-LABEL: test_mm256_mask_shrdv_epi64:
1616 ; X86: # %bb.0: # %entry
1617 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1618 ; X86-NEXT: kmovd %eax, %k1
1619 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1}
1622 ; X64-LABEL: test_mm256_mask_shrdv_epi64:
1623 ; X64: # %bb.0: # %entry
1624 ; X64-NEXT: kmovd %edi, %k1
1625 ; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1}
1628 %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U)
1632 define <4 x i64> @test_mm256_maskz_shrdv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1633 ; X86-LABEL: test_mm256_maskz_shrdv_epi64:
1634 ; X86: # %bb.0: # %entry
1635 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1636 ; X86-NEXT: kmovd %eax, %k1
1637 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1640 ; X64-LABEL: test_mm256_maskz_shrdv_epi64:
1641 ; X64: # %bb.0: # %entry
1642 ; X64-NEXT: kmovd %edi, %k1
1643 ; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1646 %0 = tail call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 %__U)
1650 define <4 x i64> @test_mm256_shrdv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1651 ; CHECK-LABEL: test_mm256_shrdv_epi64:
1652 ; CHECK: # %bb.0: # %entry
1653 ; CHECK-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0
1654 ; CHECK-NEXT: ret{{[l|q]}}
1656 %0 = tail call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B, i8 -1)
1660 define <2 x i64> @test_mm_mask_shrdv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1661 ; X86-LABEL: test_mm_mask_shrdv_epi64:
1662 ; X86: # %bb.0: # %entry
1663 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1664 ; X86-NEXT: kmovd %eax, %k1
1665 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1}
1668 ; X64-LABEL: test_mm_mask_shrdv_epi64:
1669 ; X64: # %bb.0: # %entry
1670 ; X64-NEXT: kmovd %edi, %k1
1671 ; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1}
1674 %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U)
1678 define <2 x i64> @test_mm_maskz_shrdv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1679 ; X86-LABEL: test_mm_maskz_shrdv_epi64:
1680 ; X86: # %bb.0: # %entry
1681 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1682 ; X86-NEXT: kmovd %eax, %k1
1683 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1686 ; X64-LABEL: test_mm_maskz_shrdv_epi64:
1687 ; X64: # %bb.0: # %entry
1688 ; X64-NEXT: kmovd %edi, %k1
1689 ; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1692 %0 = tail call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 %__U)
1696 define <2 x i64> @test_mm_shrdv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1697 ; CHECK-LABEL: test_mm_shrdv_epi64:
1698 ; CHECK: # %bb.0: # %entry
1699 ; CHECK-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0
1700 ; CHECK-NEXT: ret{{[l|q]}}
1702 %0 = tail call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B, i8 -1)
1706 define <4 x i64> @test_mm256_mask_shrdv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1707 ; X86-LABEL: test_mm256_mask_shrdv_epi32:
1708 ; X86: # %bb.0: # %entry
1709 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1710 ; X86-NEXT: kmovd %eax, %k1
1711 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1}
1714 ; X64-LABEL: test_mm256_mask_shrdv_epi32:
1715 ; X64: # %bb.0: # %entry
1716 ; X64-NEXT: kmovd %edi, %k1
1717 ; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1}
1720 %0 = bitcast <4 x i64> %__S to <8 x i32>
1721 %1 = bitcast <4 x i64> %__A to <8 x i32>
1722 %2 = bitcast <4 x i64> %__B to <8 x i32>
1723 %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U)
1724 %4 = bitcast <8 x i32> %3 to <4 x i64>
1728 define <4 x i64> @test_mm256_maskz_shrdv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1729 ; X86-LABEL: test_mm256_maskz_shrdv_epi32:
1730 ; X86: # %bb.0: # %entry
1731 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1732 ; X86-NEXT: kmovd %eax, %k1
1733 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1736 ; X64-LABEL: test_mm256_maskz_shrdv_epi32:
1737 ; X64: # %bb.0: # %entry
1738 ; X64-NEXT: kmovd %edi, %k1
1739 ; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1742 %0 = bitcast <4 x i64> %__S to <8 x i32>
1743 %1 = bitcast <4 x i64> %__A to <8 x i32>
1744 %2 = bitcast <4 x i64> %__B to <8 x i32>
1745 %3 = tail call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 %__U)
1746 %4 = bitcast <8 x i32> %3 to <4 x i64>
1750 define <4 x i64> @test_mm256_shrdv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1751 ; CHECK-LABEL: test_mm256_shrdv_epi32:
1752 ; CHECK: # %bb.0: # %entry
1753 ; CHECK-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0
1754 ; CHECK-NEXT: ret{{[l|q]}}
1756 %0 = bitcast <4 x i64> %__S to <8 x i32>
1757 %1 = bitcast <4 x i64> %__A to <8 x i32>
1758 %2 = bitcast <4 x i64> %__B to <8 x i32>
1759 %3 = tail call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i8 -1)
1760 %4 = bitcast <8 x i32> %3 to <4 x i64>
1764 define <2 x i64> @test_mm_mask_shrdv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1765 ; X86-LABEL: test_mm_mask_shrdv_epi32:
1766 ; X86: # %bb.0: # %entry
1767 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1768 ; X86-NEXT: kmovd %eax, %k1
1769 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1}
1772 ; X64-LABEL: test_mm_mask_shrdv_epi32:
1773 ; X64: # %bb.0: # %entry
1774 ; X64-NEXT: kmovd %edi, %k1
1775 ; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1}
1778 %0 = bitcast <2 x i64> %__S to <4 x i32>
1779 %1 = bitcast <2 x i64> %__A to <4 x i32>
1780 %2 = bitcast <2 x i64> %__B to <4 x i32>
1781 %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U)
1782 %4 = bitcast <4 x i32> %3 to <2 x i64>
1786 define <2 x i64> @test_mm_maskz_shrdv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1787 ; X86-LABEL: test_mm_maskz_shrdv_epi32:
1788 ; X86: # %bb.0: # %entry
1789 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1790 ; X86-NEXT: kmovd %eax, %k1
1791 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1794 ; X64-LABEL: test_mm_maskz_shrdv_epi32:
1795 ; X64: # %bb.0: # %entry
1796 ; X64-NEXT: kmovd %edi, %k1
1797 ; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1800 %0 = bitcast <2 x i64> %__S to <4 x i32>
1801 %1 = bitcast <2 x i64> %__A to <4 x i32>
1802 %2 = bitcast <2 x i64> %__B to <4 x i32>
1803 %3 = tail call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 %__U)
1804 %4 = bitcast <4 x i32> %3 to <2 x i64>
1808 define <2 x i64> @test_mm_shrdv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1809 ; CHECK-LABEL: test_mm_shrdv_epi32:
1810 ; CHECK: # %bb.0: # %entry
1811 ; CHECK-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0
1812 ; CHECK-NEXT: ret{{[l|q]}}
1814 %0 = bitcast <2 x i64> %__S to <4 x i32>
1815 %1 = bitcast <2 x i64> %__A to <4 x i32>
1816 %2 = bitcast <2 x i64> %__B to <4 x i32>
1817 %3 = tail call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i8 -1)
1818 %4 = bitcast <4 x i32> %3 to <2 x i64>
1822 define <4 x i64> @test_mm256_mask_shrdv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1823 ; X86-LABEL: test_mm256_mask_shrdv_epi16:
1824 ; X86: # %bb.0: # %entry
1825 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1826 ; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1}
1829 ; X64-LABEL: test_mm256_mask_shrdv_epi16:
1830 ; X64: # %bb.0: # %entry
1831 ; X64-NEXT: kmovd %edi, %k1
1832 ; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1}
1835 %0 = bitcast <4 x i64> %__S to <16 x i16>
1836 %1 = bitcast <4 x i64> %__A to <16 x i16>
1837 %2 = bitcast <4 x i64> %__B to <16 x i16>
1838 %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U)
1839 %4 = bitcast <16 x i16> %3 to <4 x i64>
1843 define <4 x i64> @test_mm256_maskz_shrdv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1844 ; X86-LABEL: test_mm256_maskz_shrdv_epi16:
1845 ; X86: # %bb.0: # %entry
1846 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1847 ; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1850 ; X64-LABEL: test_mm256_maskz_shrdv_epi16:
1851 ; X64: # %bb.0: # %entry
1852 ; X64-NEXT: kmovd %edi, %k1
1853 ; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1856 %0 = bitcast <4 x i64> %__S to <16 x i16>
1857 %1 = bitcast <4 x i64> %__A to <16 x i16>
1858 %2 = bitcast <4 x i64> %__B to <16 x i16>
1859 %3 = tail call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 %__U)
1860 %4 = bitcast <16 x i16> %3 to <4 x i64>
1864 define <4 x i64> @test_mm256_shrdv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1865 ; CHECK-LABEL: test_mm256_shrdv_epi16:
1866 ; CHECK: # %bb.0: # %entry
1867 ; CHECK-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0
1868 ; CHECK-NEXT: ret{{[l|q]}}
1870 %0 = bitcast <4 x i64> %__S to <16 x i16>
1871 %1 = bitcast <4 x i64> %__A to <16 x i16>
1872 %2 = bitcast <4 x i64> %__B to <16 x i16>
1873 %3 = tail call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2, i16 -1)
1874 %4 = bitcast <16 x i16> %3 to <4 x i64>
1878 define <2 x i64> @test_mm_mask_shrdv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1879 ; X86-LABEL: test_mm_mask_shrdv_epi16:
1880 ; X86: # %bb.0: # %entry
1881 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1882 ; X86-NEXT: kmovd %eax, %k1
1883 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1}
1886 ; X64-LABEL: test_mm_mask_shrdv_epi16:
1887 ; X64: # %bb.0: # %entry
1888 ; X64-NEXT: kmovd %edi, %k1
1889 ; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1}
1892 %0 = bitcast <2 x i64> %__S to <8 x i16>
1893 %1 = bitcast <2 x i64> %__A to <8 x i16>
1894 %2 = bitcast <2 x i64> %__B to <8 x i16>
1895 %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U)
1896 %4 = bitcast <8 x i16> %3 to <2 x i64>
1900 define <2 x i64> @test_mm_maskz_shrdv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1901 ; X86-LABEL: test_mm_maskz_shrdv_epi16:
1902 ; X86: # %bb.0: # %entry
1903 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
1904 ; X86-NEXT: kmovd %eax, %k1
1905 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1908 ; X64-LABEL: test_mm_maskz_shrdv_epi16:
1909 ; X64: # %bb.0: # %entry
1910 ; X64-NEXT: kmovd %edi, %k1
1911 ; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1914 %0 = bitcast <2 x i64> %__S to <8 x i16>
1915 %1 = bitcast <2 x i64> %__A to <8 x i16>
1916 %2 = bitcast <2 x i64> %__B to <8 x i16>
1917 %3 = tail call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 %__U)
1918 %4 = bitcast <8 x i16> %3 to <2 x i64>
1922 define <2 x i64> @test_mm_shrdv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1923 ; CHECK-LABEL: test_mm_shrdv_epi16:
1924 ; CHECK: # %bb.0: # %entry
1925 ; CHECK-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0
1926 ; CHECK-NEXT: ret{{[l|q]}}
1928 %0 = bitcast <2 x i64> %__S to <8 x i16>
1929 %1 = bitcast <2 x i64> %__A to <8 x i16>
1930 %2 = bitcast <2 x i64> %__B to <8 x i16>
1931 %3 = tail call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2, i8 -1)
1932 %4 = bitcast <8 x i16> %3 to <2 x i64>
1936 declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8)
1937 declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16)
1938 declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>)
1939 declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>)
1940 declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8)
1941 declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16)
1942 declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>)
1943 declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>)
1944 declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16)
1945 declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32)
1946 declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>)
1947 declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
1948 declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16)
1949 declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32)
1950 declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>)
1951 declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>)
1952 declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1953 declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1954 declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1955 declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1956 declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1957 declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1958 declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1959 declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1960 declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1961 declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1962 declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1963 declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1964 declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1965 declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1966 declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1967 declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1968 declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1969 declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1970 declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1971 declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1972 declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1973 declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1974 declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1975 declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)