1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi2-builtins.c
7 define <2 x i64> @test_mm_mask_compress_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
8 ; X86-LABEL: test_mm_mask_compress_epi16:
9 ; X86: # %bb.0: # %entry
10 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
11 ; X86-NEXT: kmovd %eax, %k1
12 ; X86-NEXT: vpcompressw %xmm1, %xmm0 {%k1}
15 ; X64-LABEL: test_mm_mask_compress_epi16:
16 ; X64: # %bb.0: # %entry
17 ; X64-NEXT: kmovd %edi, %k1
18 ; X64-NEXT: vpcompressw %xmm1, %xmm0 {%k1}
21 %0 = bitcast <2 x i64> %__D to <8 x i16>
22 %1 = bitcast <2 x i64> %__S to <8 x i16>
23 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U)
24 %3 = bitcast <8 x i16> %2 to <2 x i64>
28 define <2 x i64> @test_mm_maskz_compress_epi16(i8 zeroext %__U, <2 x i64> %__D) {
29 ; X86-LABEL: test_mm_maskz_compress_epi16:
30 ; X86: # %bb.0: # %entry
31 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
32 ; X86-NEXT: kmovd %eax, %k1
33 ; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z}
36 ; X64-LABEL: test_mm_maskz_compress_epi16:
37 ; X64: # %bb.0: # %entry
38 ; X64-NEXT: kmovd %edi, %k1
39 ; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z}
42 %0 = bitcast <2 x i64> %__D to <8 x i16>
43 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U)
44 %2 = bitcast <8 x i16> %1 to <2 x i64>
48 define <2 x i64> @test_mm_mask_compress_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) {
49 ; X86-LABEL: test_mm_mask_compress_epi8:
50 ; X86: # %bb.0: # %entry
51 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
52 ; X86-NEXT: vpcompressb %xmm1, %xmm0 {%k1}
55 ; X64-LABEL: test_mm_mask_compress_epi8:
56 ; X64: # %bb.0: # %entry
57 ; X64-NEXT: kmovd %edi, %k1
58 ; X64-NEXT: vpcompressb %xmm1, %xmm0 {%k1}
61 %0 = bitcast <2 x i64> %__D to <16 x i8>
62 %1 = bitcast <2 x i64> %__S to <16 x i8>
63 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U)
64 %3 = bitcast <16 x i8> %2 to <2 x i64>
68 define <2 x i64> @test_mm_maskz_compress_epi8(i16 zeroext %__U, <2 x i64> %__D) {
69 ; X86-LABEL: test_mm_maskz_compress_epi8:
70 ; X86: # %bb.0: # %entry
71 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
72 ; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z}
75 ; X64-LABEL: test_mm_maskz_compress_epi8:
76 ; X64: # %bb.0: # %entry
77 ; X64-NEXT: kmovd %edi, %k1
78 ; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z}
81 %0 = bitcast <2 x i64> %__D to <16 x i8>
82 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U)
83 %2 = bitcast <16 x i8> %1 to <2 x i64>
87 define void @test_mm_mask_compressstoreu_epi16(ptr %__P, i8 zeroext %__U, <2 x i64> %__D) {
88 ; X86-LABEL: test_mm_mask_compressstoreu_epi16:
89 ; X86: # %bb.0: # %entry
90 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
91 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
92 ; X86-NEXT: kmovd %eax, %k1
93 ; X86-NEXT: vpcompressw %xmm0, (%ecx) {%k1}
96 ; X64-LABEL: test_mm_mask_compressstoreu_epi16:
97 ; X64: # %bb.0: # %entry
98 ; X64-NEXT: kmovd %esi, %k1
99 ; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1}
102 %0 = bitcast <2 x i64> %__D to <8 x i16>
103 %1 = bitcast i8 %__U to <8 x i1>
104 tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %0, ptr %__P, <8 x i1> %1)
108 define void @test_mm_mask_compressstoreu_epi8(ptr %__P, i16 zeroext %__U, <2 x i64> %__D) {
109 ; X86-LABEL: test_mm_mask_compressstoreu_epi8:
110 ; X86: # %bb.0: # %entry
111 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
112 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
113 ; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1}
116 ; X64-LABEL: test_mm_mask_compressstoreu_epi8:
117 ; X64: # %bb.0: # %entry
118 ; X64-NEXT: kmovd %esi, %k1
119 ; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1}
122 %0 = bitcast <2 x i64> %__D to <16 x i8>
123 %1 = bitcast i16 %__U to <16 x i1>
124 tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %0, ptr %__P, <16 x i1> %1)
128 define <2 x i64> @test_mm_mask_expand_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
129 ; X86-LABEL: test_mm_mask_expand_epi16:
130 ; X86: # %bb.0: # %entry
131 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
132 ; X86-NEXT: kmovd %eax, %k1
133 ; X86-NEXT: vpexpandw %xmm1, %xmm0 {%k1}
136 ; X64-LABEL: test_mm_mask_expand_epi16:
137 ; X64: # %bb.0: # %entry
138 ; X64-NEXT: kmovd %edi, %k1
139 ; X64-NEXT: vpexpandw %xmm1, %xmm0 {%k1}
142 %0 = bitcast <2 x i64> %__D to <8 x i16>
143 %1 = bitcast <2 x i64> %__S to <8 x i16>
144 %2 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> %1, i8 %__U)
145 %3 = bitcast <8 x i16> %2 to <2 x i64>
149 define <2 x i64> @test_mm_maskz_expand_epi16(i8 zeroext %__U, <2 x i64> %__D) {
150 ; X86-LABEL: test_mm_maskz_expand_epi16:
151 ; X86: # %bb.0: # %entry
152 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
153 ; X86-NEXT: kmovd %eax, %k1
154 ; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z}
157 ; X64-LABEL: test_mm_maskz_expand_epi16:
158 ; X64: # %bb.0: # %entry
159 ; X64-NEXT: kmovd %edi, %k1
160 ; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z}
163 %0 = bitcast <2 x i64> %__D to <8 x i16>
164 %1 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %0, <8 x i16> zeroinitializer, i8 %__U)
165 %2 = bitcast <8 x i16> %1 to <2 x i64>
169 define <2 x i64> @test_mm_mask_expand_epi8(<2 x i64> %__S, i16 zeroext %__U, <2 x i64> %__D) {
170 ; X86-LABEL: test_mm_mask_expand_epi8:
171 ; X86: # %bb.0: # %entry
172 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
173 ; X86-NEXT: vpexpandb %xmm1, %xmm0 {%k1}
176 ; X64-LABEL: test_mm_mask_expand_epi8:
177 ; X64: # %bb.0: # %entry
178 ; X64-NEXT: kmovd %edi, %k1
179 ; X64-NEXT: vpexpandb %xmm1, %xmm0 {%k1}
182 %0 = bitcast <2 x i64> %__D to <16 x i8>
183 %1 = bitcast <2 x i64> %__S to <16 x i8>
184 %2 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> %1, i16 %__U)
185 %3 = bitcast <16 x i8> %2 to <2 x i64>
189 define <2 x i64> @test_mm_maskz_expand_epi8(i16 zeroext %__U, <2 x i64> %__D) {
190 ; X86-LABEL: test_mm_maskz_expand_epi8:
191 ; X86: # %bb.0: # %entry
192 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
193 ; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z}
196 ; X64-LABEL: test_mm_maskz_expand_epi8:
197 ; X64: # %bb.0: # %entry
198 ; X64-NEXT: kmovd %edi, %k1
199 ; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z}
202 %0 = bitcast <2 x i64> %__D to <16 x i8>
203 %1 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %0, <16 x i8> zeroinitializer, i16 %__U)
204 %2 = bitcast <16 x i8> %1 to <2 x i64>
208 define <2 x i64> @test_mm_mask_expandloadu_epi16(<2 x i64> %__S, i8 zeroext %__U, ptr readonly %__P) {
209 ; X86-LABEL: test_mm_mask_expandloadu_epi16:
210 ; X86: # %bb.0: # %entry
211 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
212 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
213 ; X86-NEXT: kmovd %ecx, %k1
214 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1}
217 ; X64-LABEL: test_mm_mask_expandloadu_epi16:
218 ; X64: # %bb.0: # %entry
219 ; X64-NEXT: kmovd %edi, %k1
220 ; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1}
223 %0 = bitcast <2 x i64> %__S to <8 x i16>
224 %1 = bitcast i8 %__U to <8 x i1>
225 %2 = tail call <8 x i16> @llvm.masked.expandload.v8i16(ptr %__P, <8 x i1> %1, <8 x i16> %0)
226 %3 = bitcast <8 x i16> %2 to <2 x i64>
230 define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, ptr readonly %__P) {
231 ; X86-LABEL: test_mm_maskz_expandloadu_epi16:
232 ; X86: # %bb.0: # %entry
233 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
234 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
235 ; X86-NEXT: kmovd %ecx, %k1
236 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z}
239 ; X64-LABEL: test_mm_maskz_expandloadu_epi16:
240 ; X64: # %bb.0: # %entry
241 ; X64-NEXT: kmovd %edi, %k1
242 ; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1} {z}
245 %0 = bitcast i8 %__U to <8 x i1>
246 %1 = tail call <8 x i16> @llvm.masked.expandload.v8i16(ptr %__P, <8 x i1> %0, <8 x i16> zeroinitializer)
247 %2 = bitcast <8 x i16> %1 to <2 x i64>
251 define <2 x i64> @test_mm_mask_expandloadu_epi8(<2 x i64> %__S, i16 zeroext %__U, ptr readonly %__P) {
252 ; X86-LABEL: test_mm_mask_expandloadu_epi8:
253 ; X86: # %bb.0: # %entry
254 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
255 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
256 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1}
259 ; X64-LABEL: test_mm_mask_expandloadu_epi8:
260 ; X64: # %bb.0: # %entry
261 ; X64-NEXT: kmovd %edi, %k1
262 ; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1}
265 %0 = bitcast <2 x i64> %__S to <16 x i8>
266 %1 = bitcast i16 %__U to <16 x i1>
267 %2 = tail call <16 x i8> @llvm.masked.expandload.v16i8(ptr %__P, <16 x i1> %1, <16 x i8> %0)
268 %3 = bitcast <16 x i8> %2 to <2 x i64>
272 define <2 x i64> @test_mm_maskz_expandloadu_epi8(i16 zeroext %__U, ptr readonly %__P) {
273 ; X86-LABEL: test_mm_maskz_expandloadu_epi8:
274 ; X86: # %bb.0: # %entry
275 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
276 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
277 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z}
280 ; X64-LABEL: test_mm_maskz_expandloadu_epi8:
281 ; X64: # %bb.0: # %entry
282 ; X64-NEXT: kmovd %edi, %k1
283 ; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1} {z}
286 %0 = bitcast i16 %__U to <16 x i1>
287 %1 = tail call <16 x i8> @llvm.masked.expandload.v16i8(ptr %__P, <16 x i1> %0, <16 x i8> zeroinitializer)
288 %2 = bitcast <16 x i8> %1 to <2 x i64>
292 define <4 x i64> @test_mm256_mask_compress_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) {
293 ; X86-LABEL: test_mm256_mask_compress_epi16:
294 ; X86: # %bb.0: # %entry
295 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
296 ; X86-NEXT: vpcompressw %ymm1, %ymm0 {%k1}
299 ; X64-LABEL: test_mm256_mask_compress_epi16:
300 ; X64: # %bb.0: # %entry
301 ; X64-NEXT: kmovd %edi, %k1
302 ; X64-NEXT: vpcompressw %ymm1, %ymm0 {%k1}
305 %0 = bitcast <4 x i64> %__D to <16 x i16>
306 %1 = bitcast <4 x i64> %__S to <16 x i16>
307 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U)
308 %3 = bitcast <16 x i16> %2 to <4 x i64>
312 define <4 x i64> @test_mm256_maskz_compress_epi16(i16 zeroext %__U, <4 x i64> %__D) {
313 ; X86-LABEL: test_mm256_maskz_compress_epi16:
314 ; X86: # %bb.0: # %entry
315 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
316 ; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z}
319 ; X64-LABEL: test_mm256_maskz_compress_epi16:
320 ; X64: # %bb.0: # %entry
321 ; X64-NEXT: kmovd %edi, %k1
322 ; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z}
325 %0 = bitcast <4 x i64> %__D to <16 x i16>
326 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U)
327 %2 = bitcast <16 x i16> %1 to <4 x i64>
331 define <4 x i64> @test_mm256_mask_compress_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) {
332 ; X86-LABEL: test_mm256_mask_compress_epi8:
333 ; X86: # %bb.0: # %entry
334 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
335 ; X86-NEXT: vpcompressb %ymm1, %ymm0 {%k1}
338 ; X64-LABEL: test_mm256_mask_compress_epi8:
339 ; X64: # %bb.0: # %entry
340 ; X64-NEXT: kmovd %edi, %k1
341 ; X64-NEXT: vpcompressb %ymm1, %ymm0 {%k1}
344 %0 = bitcast <4 x i64> %__D to <32 x i8>
345 %1 = bitcast <4 x i64> %__S to <32 x i8>
346 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U)
347 %3 = bitcast <32 x i8> %2 to <4 x i64>
351 define <4 x i64> @test_mm256_maskz_compress_epi8(i32 %__U, <4 x i64> %__D) {
352 ; X86-LABEL: test_mm256_maskz_compress_epi8:
353 ; X86: # %bb.0: # %entry
354 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
355 ; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z}
358 ; X64-LABEL: test_mm256_maskz_compress_epi8:
359 ; X64: # %bb.0: # %entry
360 ; X64-NEXT: kmovd %edi, %k1
361 ; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z}
364 %0 = bitcast <4 x i64> %__D to <32 x i8>
365 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U)
366 %2 = bitcast <32 x i8> %1 to <4 x i64>
370 define void @test_mm256_mask_compressstoreu_epi16(ptr %__P, i16 zeroext %__U, <4 x i64> %__D) {
371 ; X86-LABEL: test_mm256_mask_compressstoreu_epi16:
372 ; X86: # %bb.0: # %entry
373 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
374 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
375 ; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1}
376 ; X86-NEXT: vzeroupper
379 ; X64-LABEL: test_mm256_mask_compressstoreu_epi16:
380 ; X64: # %bb.0: # %entry
381 ; X64-NEXT: kmovd %esi, %k1
382 ; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1}
383 ; X64-NEXT: vzeroupper
386 %0 = bitcast <4 x i64> %__D to <16 x i16>
387 %1 = bitcast i16 %__U to <16 x i1>
388 tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %0, ptr %__P, <16 x i1> %1)
392 define void @test_mm256_mask_compressstoreu_epi8(ptr %__P, i32 %__U, <4 x i64> %__D) {
393 ; X86-LABEL: test_mm256_mask_compressstoreu_epi8:
394 ; X86: # %bb.0: # %entry
395 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
396 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
397 ; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1}
398 ; X86-NEXT: vzeroupper
401 ; X64-LABEL: test_mm256_mask_compressstoreu_epi8:
402 ; X64: # %bb.0: # %entry
403 ; X64-NEXT: kmovd %esi, %k1
404 ; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1}
405 ; X64-NEXT: vzeroupper
408 %0 = bitcast <4 x i64> %__D to <32 x i8>
409 %1 = bitcast i32 %__U to <32 x i1>
410 tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %0, ptr %__P, <32 x i1> %1)
414 define <4 x i64> @test_mm256_mask_expand_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) {
415 ; X86-LABEL: test_mm256_mask_expand_epi16:
416 ; X86: # %bb.0: # %entry
417 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
418 ; X86-NEXT: vpexpandw %ymm1, %ymm0 {%k1}
421 ; X64-LABEL: test_mm256_mask_expand_epi16:
422 ; X64: # %bb.0: # %entry
423 ; X64-NEXT: kmovd %edi, %k1
424 ; X64-NEXT: vpexpandw %ymm1, %ymm0 {%k1}
427 %0 = bitcast <4 x i64> %__D to <16 x i16>
428 %1 = bitcast <4 x i64> %__S to <16 x i16>
429 %2 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> %1, i16 %__U)
430 %3 = bitcast <16 x i16> %2 to <4 x i64>
434 define <4 x i64> @test_mm256_maskz_expand_epi16(i16 zeroext %__U, <4 x i64> %__D) {
435 ; X86-LABEL: test_mm256_maskz_expand_epi16:
436 ; X86: # %bb.0: # %entry
437 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
438 ; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z}
441 ; X64-LABEL: test_mm256_maskz_expand_epi16:
442 ; X64: # %bb.0: # %entry
443 ; X64-NEXT: kmovd %edi, %k1
444 ; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z}
447 %0 = bitcast <4 x i64> %__D to <16 x i16>
448 %1 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %0, <16 x i16> zeroinitializer, i16 %__U)
449 %2 = bitcast <16 x i16> %1 to <4 x i64>
453 define <4 x i64> @test_mm256_mask_expand_epi8(<4 x i64> %__S, i32 %__U, <4 x i64> %__D) {
454 ; X86-LABEL: test_mm256_mask_expand_epi8:
455 ; X86: # %bb.0: # %entry
456 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
457 ; X86-NEXT: vpexpandb %ymm1, %ymm0 {%k1}
460 ; X64-LABEL: test_mm256_mask_expand_epi8:
461 ; X64: # %bb.0: # %entry
462 ; X64-NEXT: kmovd %edi, %k1
463 ; X64-NEXT: vpexpandb %ymm1, %ymm0 {%k1}
466 %0 = bitcast <4 x i64> %__D to <32 x i8>
467 %1 = bitcast <4 x i64> %__S to <32 x i8>
468 %2 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> %1, i32 %__U)
469 %3 = bitcast <32 x i8> %2 to <4 x i64>
473 define <4 x i64> @test_mm256_maskz_expand_epi8(i32 %__U, <4 x i64> %__D) {
474 ; X86-LABEL: test_mm256_maskz_expand_epi8:
475 ; X86: # %bb.0: # %entry
476 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
477 ; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z}
480 ; X64-LABEL: test_mm256_maskz_expand_epi8:
481 ; X64: # %bb.0: # %entry
482 ; X64-NEXT: kmovd %edi, %k1
483 ; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z}
486 %0 = bitcast <4 x i64> %__D to <32 x i8>
487 %1 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %0, <32 x i8> zeroinitializer, i32 %__U)
488 %2 = bitcast <32 x i8> %1 to <4 x i64>
492 define <4 x i64> @test_mm256_mask_expandloadu_epi16(<4 x i64> %__S, i16 zeroext %__U, ptr readonly %__P) {
493 ; X86-LABEL: test_mm256_mask_expandloadu_epi16:
494 ; X86: # %bb.0: # %entry
495 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
496 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
497 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1}
500 ; X64-LABEL: test_mm256_mask_expandloadu_epi16:
501 ; X64: # %bb.0: # %entry
502 ; X64-NEXT: kmovd %edi, %k1
503 ; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1}
506 %0 = bitcast <4 x i64> %__S to <16 x i16>
507 %1 = bitcast i16 %__U to <16 x i1>
508 %2 = tail call <16 x i16> @llvm.masked.expandload.v16i16(ptr %__P, <16 x i1> %1, <16 x i16> %0)
509 %3 = bitcast <16 x i16> %2 to <4 x i64>
513 define <4 x i64> @test_mm256_maskz_expandloadu_epi16(i16 zeroext %__U, ptr readonly %__P) {
514 ; X86-LABEL: test_mm256_maskz_expandloadu_epi16:
515 ; X86: # %bb.0: # %entry
516 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
517 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
518 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z}
521 ; X64-LABEL: test_mm256_maskz_expandloadu_epi16:
522 ; X64: # %bb.0: # %entry
523 ; X64-NEXT: kmovd %edi, %k1
524 ; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1} {z}
527 %0 = bitcast i16 %__U to <16 x i1>
528 %1 = tail call <16 x i16> @llvm.masked.expandload.v16i16(ptr %__P, <16 x i1> %0, <16 x i16> zeroinitializer)
529 %2 = bitcast <16 x i16> %1 to <4 x i64>
533 define <4 x i64> @test_mm256_mask_expandloadu_epi8(<4 x i64> %__S, i32 %__U, ptr readonly %__P) {
534 ; X86-LABEL: test_mm256_mask_expandloadu_epi8:
535 ; X86: # %bb.0: # %entry
536 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
537 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
538 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1}
541 ; X64-LABEL: test_mm256_mask_expandloadu_epi8:
542 ; X64: # %bb.0: # %entry
543 ; X64-NEXT: kmovd %edi, %k1
544 ; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1}
547 %0 = bitcast <4 x i64> %__S to <32 x i8>
548 %1 = bitcast i32 %__U to <32 x i1>
549 %2 = tail call <32 x i8> @llvm.masked.expandload.v32i8(ptr %__P, <32 x i1> %1, <32 x i8> %0)
550 %3 = bitcast <32 x i8> %2 to <4 x i64>
554 define <4 x i64> @test_mm256_maskz_expandloadu_epi8(i32 %__U, ptr readonly %__P) {
555 ; X86-LABEL: test_mm256_maskz_expandloadu_epi8:
556 ; X86: # %bb.0: # %entry
557 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
558 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
559 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z}
562 ; X64-LABEL: test_mm256_maskz_expandloadu_epi8:
563 ; X64: # %bb.0: # %entry
564 ; X64-NEXT: kmovd %edi, %k1
565 ; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1} {z}
568 %0 = bitcast i32 %__U to <32 x i1>
569 %1 = tail call <32 x i8> @llvm.masked.expandload.v32i8(ptr %__P, <32 x i1> %0, <32 x i8> zeroinitializer)
570 %2 = bitcast <32 x i8> %1 to <4 x i64>
574 define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
575 ; X86-LABEL: test_mm256_mask_shldi_epi64:
576 ; X86: # %bb.0: # %entry
577 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
578 ; X86-NEXT: kmovd %eax, %k1
579 ; X86-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1}
582 ; X64-LABEL: test_mm256_mask_shldi_epi64:
583 ; X64: # %bb.0: # %entry
584 ; X64-NEXT: kmovd %edi, %k1
585 ; X64-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1}
588 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> <i64 47, i64 47, i64 47, i64 47>)
589 %1 = bitcast i8 %__U to <8 x i1>
590 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
591 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S
595 declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
597 define <4 x i64> @test_mm256_maskz_shldi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
598 ; X86-LABEL: test_mm256_maskz_shldi_epi64:
599 ; X86: # %bb.0: # %entry
600 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
601 ; X86-NEXT: kmovd %eax, %k1
602 ; X86-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
605 ; X64-LABEL: test_mm256_maskz_shldi_epi64:
606 ; X64: # %bb.0: # %entry
607 ; X64-NEXT: kmovd %edi, %k1
608 ; X64-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
611 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> <i64 63, i64 63, i64 63, i64 63>)
612 %1 = bitcast i8 %__U to <8 x i1>
613 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
614 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
618 define <4 x i64> @test_mm256_shldi_epi64(<4 x i64> %__A, <4 x i64> %__B) {
619 ; CHECK-LABEL: test_mm256_shldi_epi64:
620 ; CHECK: # %bb.0: # %entry
621 ; CHECK-NEXT: vpshldq $31, %ymm1, %ymm0, %ymm0
622 ; CHECK-NEXT: ret{{[l|q]}}
624 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> <i64 31, i64 31, i64 31, i64 31>)
628 define <2 x i64> @test_mm_mask_shldi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
629 ; X86-LABEL: test_mm_mask_shldi_epi64:
630 ; X86: # %bb.0: # %entry
631 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
632 ; X86-NEXT: kmovd %eax, %k1
633 ; X86-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1}
636 ; X64-LABEL: test_mm_mask_shldi_epi64:
637 ; X64: # %bb.0: # %entry
638 ; X64-NEXT: kmovd %edi, %k1
639 ; X64-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1}
642 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> <i64 47, i64 47>)
643 %1 = bitcast i8 %__U to <8 x i1>
644 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
645 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S
649 declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
651 define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
652 ; X86-LABEL: test_mm_maskz_shldi_epi64:
653 ; X86: # %bb.0: # %entry
654 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
655 ; X86-NEXT: kmovd %eax, %k1
656 ; X86-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
659 ; X64-LABEL: test_mm_maskz_shldi_epi64:
660 ; X64: # %bb.0: # %entry
661 ; X64-NEXT: kmovd %edi, %k1
662 ; X64-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
665 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> <i64 63, i64 63>)
666 %1 = bitcast i8 %__U to <8 x i1>
667 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
668 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
672 define <2 x i64> @test_mm_shldi_epi64(<2 x i64> %__A, <2 x i64> %__B) {
673 ; CHECK-LABEL: test_mm_shldi_epi64:
674 ; CHECK: # %bb.0: # %entry
675 ; CHECK-NEXT: vpshldq $31, %xmm1, %xmm0, %xmm0
676 ; CHECK-NEXT: ret{{[l|q]}}
678 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> <i64 31, i64 31>)
682 define <4 x i64> @test_mm256_mask_shldi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
683 ; X86-LABEL: test_mm256_mask_shldi_epi32:
684 ; X86: # %bb.0: # %entry
685 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
686 ; X86-NEXT: kmovd %eax, %k1
687 ; X86-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1}
690 ; X64-LABEL: test_mm256_mask_shldi_epi32:
691 ; X64: # %bb.0: # %entry
692 ; X64-NEXT: kmovd %edi, %k1
693 ; X64-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1}
696 %0 = bitcast <4 x i64> %__A to <8 x i32>
697 %1 = bitcast <4 x i64> %__B to <8 x i32>
698 %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
699 %3 = bitcast <4 x i64> %__S to <8 x i32>
700 %4 = bitcast i8 %__U to <8 x i1>
701 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
702 %6 = bitcast <8 x i32> %5 to <4 x i64>
706 declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
708 define <4 x i64> @test_mm256_maskz_shldi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
709 ; X86-LABEL: test_mm256_maskz_shldi_epi32:
710 ; X86: # %bb.0: # %entry
711 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
712 ; X86-NEXT: kmovd %eax, %k1
713 ; X86-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
716 ; X64-LABEL: test_mm256_maskz_shldi_epi32:
717 ; X64: # %bb.0: # %entry
718 ; X64-NEXT: kmovd %edi, %k1
719 ; X64-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
722 %0 = bitcast <4 x i64> %__A to <8 x i32>
723 %1 = bitcast <4 x i64> %__B to <8 x i32>
724 %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
725 %3 = bitcast i8 %__U to <8 x i1>
726 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
727 %5 = bitcast <8 x i32> %4 to <4 x i64>
731 define <4 x i64> @test_mm256_shldi_epi32(<4 x i64> %__A, <4 x i64> %__B) {
732 ; CHECK-LABEL: test_mm256_shldi_epi32:
733 ; CHECK: # %bb.0: # %entry
734 ; CHECK-NEXT: vpshldd $31, %ymm1, %ymm0, %ymm0
735 ; CHECK-NEXT: ret{{[l|q]}}
737 %0 = bitcast <4 x i64> %__A to <8 x i32>
738 %1 = bitcast <4 x i64> %__B to <8 x i32>
739 %2 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
740 %3 = bitcast <8 x i32> %2 to <4 x i64>
744 define <2 x i64> @test_mm_mask_shldi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
745 ; X86-LABEL: test_mm_mask_shldi_epi32:
746 ; X86: # %bb.0: # %entry
747 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
748 ; X86-NEXT: kmovd %eax, %k1
749 ; X86-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1}
752 ; X64-LABEL: test_mm_mask_shldi_epi32:
753 ; X64: # %bb.0: # %entry
754 ; X64-NEXT: kmovd %edi, %k1
755 ; X64-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1}
758 %0 = bitcast <2 x i64> %__A to <4 x i32>
759 %1 = bitcast <2 x i64> %__B to <4 x i32>
760 %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>)
761 %3 = bitcast <2 x i64> %__S to <4 x i32>
762 %4 = bitcast i8 %__U to <8 x i1>
763 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
764 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3
765 %6 = bitcast <4 x i32> %5 to <2 x i64>
769 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
771 define <2 x i64> @test_mm_maskz_shldi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
772 ; X86-LABEL: test_mm_maskz_shldi_epi32:
773 ; X86: # %bb.0: # %entry
774 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
775 ; X86-NEXT: kmovd %eax, %k1
776 ; X86-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
779 ; X64-LABEL: test_mm_maskz_shldi_epi32:
780 ; X64: # %bb.0: # %entry
781 ; X64-NEXT: kmovd %edi, %k1
782 ; X64-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
785 %0 = bitcast <2 x i64> %__A to <4 x i32>
786 %1 = bitcast <2 x i64> %__B to <4 x i32>
787 %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
788 %3 = bitcast i8 %__U to <8 x i1>
789 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
790 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer
791 %5 = bitcast <4 x i32> %4 to <2 x i64>
795 define <2 x i64> @test_mm_shldi_epi32(<2 x i64> %__A, <2 x i64> %__B) {
796 ; CHECK-LABEL: test_mm_shldi_epi32:
797 ; CHECK: # %bb.0: # %entry
798 ; CHECK-NEXT: vpshldd $31, %xmm1, %xmm0, %xmm0
799 ; CHECK-NEXT: ret{{[l|q]}}
801 %0 = bitcast <2 x i64> %__A to <4 x i32>
802 %1 = bitcast <2 x i64> %__B to <4 x i32>
803 %2 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> <i32 31, i32 31, i32 31, i32 31>)
804 %3 = bitcast <4 x i32> %2 to <2 x i64>
808 define <4 x i64> @test_mm256_mask_shldi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
809 ; X86-LABEL: test_mm256_mask_shldi_epi16:
810 ; X86: # %bb.0: # %entry
811 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
812 ; X86-NEXT: vpshldw $3, %ymm2, %ymm1, %ymm0 {%k1}
815 ; X64-LABEL: test_mm256_mask_shldi_epi16:
816 ; X64: # %bb.0: # %entry
817 ; X64-NEXT: kmovd %edi, %k1
818 ; X64-NEXT: vpshldw $3, %ymm2, %ymm1, %ymm0 {%k1}
821 %0 = bitcast <4 x i64> %__A to <16 x i16>
822 %1 = bitcast <4 x i64> %__B to <16 x i16>
823 %2 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
824 %3 = bitcast <4 x i64> %__S to <16 x i16>
825 %4 = bitcast i16 %__U to <16 x i1>
826 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3
827 %6 = bitcast <16 x i16> %5 to <4 x i64>
831 declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
833 define <4 x i64> @test_mm256_maskz_shldi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
834 ; X86-LABEL: test_mm256_maskz_shldi_epi16:
835 ; X86: # %bb.0: # %entry
836 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
837 ; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 {%k1} {z}
840 ; X64-LABEL: test_mm256_maskz_shldi_epi16:
841 ; X64: # %bb.0: # %entry
842 ; X64-NEXT: kmovd %edi, %k1
843 ; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 {%k1} {z}
846 %0 = bitcast <4 x i64> %__A to <16 x i16>
847 %1 = bitcast <4 x i64> %__B to <16 x i16>
848 %2 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
849 %3 = bitcast i16 %__U to <16 x i1>
850 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
851 %5 = bitcast <16 x i16> %4 to <4 x i64>
855 define <4 x i64> @test_mm256_shldi_epi16(<4 x i64> %__A, <4 x i64> %__B) {
856 ; CHECK-LABEL: test_mm256_shldi_epi16:
857 ; CHECK: # %bb.0: # %entry
858 ; CHECK-NEXT: vpshldw $15, %ymm1, %ymm0, %ymm0
859 ; CHECK-NEXT: ret{{[l|q]}}
861 %0 = bitcast <4 x i64> %__A to <16 x i16>
862 %1 = bitcast <4 x i64> %__B to <16 x i16>
863 %2 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
864 %3 = bitcast <16 x i16> %2 to <4 x i64>
868 define <2 x i64> @test_mm_mask_shldi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
869 ; X86-LABEL: test_mm_mask_shldi_epi16:
870 ; X86: # %bb.0: # %entry
871 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
872 ; X86-NEXT: kmovd %eax, %k1
873 ; X86-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1}
876 ; X64-LABEL: test_mm_mask_shldi_epi16:
877 ; X64: # %bb.0: # %entry
878 ; X64-NEXT: kmovd %edi, %k1
879 ; X64-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1}
882 %0 = bitcast <2 x i64> %__A to <8 x i16>
883 %1 = bitcast <2 x i64> %__B to <8 x i16>
884 %2 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
885 %3 = bitcast <2 x i64> %__S to <8 x i16>
886 %4 = bitcast i8 %__U to <8 x i1>
887 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3
888 %6 = bitcast <8 x i16> %5 to <2 x i64>
892 declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
894 define <2 x i64> @test_mm_maskz_shldi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
895 ; X86-LABEL: test_mm_maskz_shldi_epi16:
896 ; X86: # %bb.0: # %entry
897 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
898 ; X86-NEXT: kmovd %eax, %k1
899 ; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
902 ; X64-LABEL: test_mm_maskz_shldi_epi16:
903 ; X64: # %bb.0: # %entry
904 ; X64-NEXT: kmovd %edi, %k1
905 ; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
908 %0 = bitcast <2 x i64> %__A to <8 x i16>
909 %1 = bitcast <2 x i64> %__B to <8 x i16>
910 %2 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
911 %3 = bitcast i8 %__U to <8 x i1>
912 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
913 %5 = bitcast <8 x i16> %4 to <2 x i64>
917 define <2 x i64> @test_mm_shldi_epi16(<2 x i64> %__A, <2 x i64> %__B) {
918 ; CHECK-LABEL: test_mm_shldi_epi16:
919 ; CHECK: # %bb.0: # %entry
920 ; CHECK-NEXT: vpshldw $15, %xmm1, %xmm0, %xmm0
921 ; CHECK-NEXT: ret{{[l|q]}}
923 %0 = bitcast <2 x i64> %__A to <8 x i16>
924 %1 = bitcast <2 x i64> %__B to <8 x i16>
925 %2 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
926 %3 = bitcast <8 x i16> %2 to <2 x i64>
930 define <4 x i64> @test_mm256_mask_shrdi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
931 ; X86-LABEL: test_mm256_mask_shrdi_epi64:
932 ; X86: # %bb.0: # %entry
933 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
934 ; X86-NEXT: kmovd %eax, %k1
935 ; X86-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1}
938 ; X64-LABEL: test_mm256_mask_shrdi_epi64:
939 ; X64: # %bb.0: # %entry
940 ; X64-NEXT: kmovd %edi, %k1
941 ; X64-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1}
944 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__B, <4 x i64> %__A, <4 x i64> <i64 47, i64 47, i64 47, i64 47>)
945 %1 = bitcast i8 %__U to <8 x i1>
946 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
947 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S
951 declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
953 define <4 x i64> @test_mm256_maskz_shrdi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
954 ; X86-LABEL: test_mm256_maskz_shrdi_epi64:
955 ; X86: # %bb.0: # %entry
956 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
957 ; X86-NEXT: kmovd %eax, %k1
958 ; X86-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
961 ; X64-LABEL: test_mm256_maskz_shrdi_epi64:
962 ; X64: # %bb.0: # %entry
963 ; X64-NEXT: kmovd %edi, %k1
964 ; X64-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
967 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__B, <4 x i64> %__A, <4 x i64> <i64 63, i64 63, i64 63, i64 63>)
968 %1 = bitcast i8 %__U to <8 x i1>
969 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
970 %2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
974 define <4 x i64> @test_mm256_shrdi_epi64(<4 x i64> %__A, <4 x i64> %__B) {
975 ; CHECK-LABEL: test_mm256_shrdi_epi64:
976 ; CHECK: # %bb.0: # %entry
977 ; CHECK-NEXT: vpshrdq $31, %ymm1, %ymm0, %ymm0
978 ; CHECK-NEXT: ret{{[l|q]}}
980 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__B, <4 x i64> %__A, <4 x i64> <i64 31, i64 31, i64 31, i64 31>)
984 define <2 x i64> @test_mm_mask_shrdi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
985 ; X86-LABEL: test_mm_mask_shrdi_epi64:
986 ; X86: # %bb.0: # %entry
987 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
988 ; X86-NEXT: kmovd %eax, %k1
989 ; X86-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1}
992 ; X64-LABEL: test_mm_mask_shrdi_epi64:
993 ; X64: # %bb.0: # %entry
994 ; X64-NEXT: kmovd %edi, %k1
995 ; X64-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1}
998 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__B, <2 x i64> %__A, <2 x i64> <i64 47, i64 47>)
999 %1 = bitcast i8 %__U to <8 x i1>
1000 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1001 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S
1005 declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
1007 define <2 x i64> @test_mm_maskz_shrdi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1008 ; X86-LABEL: test_mm_maskz_shrdi_epi64:
1009 ; X86: # %bb.0: # %entry
1010 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1011 ; X86-NEXT: kmovd %eax, %k1
1012 ; X86-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1015 ; X64-LABEL: test_mm_maskz_shrdi_epi64:
1016 ; X64: # %bb.0: # %entry
1017 ; X64-NEXT: kmovd %edi, %k1
1018 ; X64-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
1021 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__B, <2 x i64> %__A, <2 x i64> <i64 63, i64 63>)
1022 %1 = bitcast i8 %__U to <8 x i1>
1023 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1024 %2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
1028 define <2 x i64> @test_mm_shrdi_epi64(<2 x i64> %__A, <2 x i64> %__B) {
1029 ; CHECK-LABEL: test_mm_shrdi_epi64:
1030 ; CHECK: # %bb.0: # %entry
1031 ; CHECK-NEXT: vpshrdq $31, %xmm1, %xmm0, %xmm0
1032 ; CHECK-NEXT: ret{{[l|q]}}
1034 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__B, <2 x i64> %__A, <2 x i64> <i64 31, i64 31>)
1038 define <4 x i64> @test_mm256_mask_shrdi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1039 ; X86-LABEL: test_mm256_mask_shrdi_epi32:
1040 ; X86: # %bb.0: # %entry
1041 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1042 ; X86-NEXT: kmovd %eax, %k1
1043 ; X86-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1}
1046 ; X64-LABEL: test_mm256_mask_shrdi_epi32:
1047 ; X64: # %bb.0: # %entry
1048 ; X64-NEXT: kmovd %edi, %k1
1049 ; X64-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1}
1052 %0 = bitcast <4 x i64> %__A to <8 x i32>
1053 %1 = bitcast <4 x i64> %__B to <8 x i32>
1054 %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
1055 %3 = bitcast <4 x i64> %__S to <8 x i32>
1056 %4 = bitcast i8 %__U to <8 x i1>
1057 %5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
1058 %6 = bitcast <8 x i32> %5 to <4 x i64>
1062 declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
1064 define <4 x i64> @test_mm256_maskz_shrdi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1065 ; X86-LABEL: test_mm256_maskz_shrdi_epi32:
1066 ; X86: # %bb.0: # %entry
1067 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1068 ; X86-NEXT: kmovd %eax, %k1
1069 ; X86-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
1072 ; X64-LABEL: test_mm256_maskz_shrdi_epi32:
1073 ; X64: # %bb.0: # %entry
1074 ; X64-NEXT: kmovd %edi, %k1
1075 ; X64-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
1078 %0 = bitcast <4 x i64> %__A to <8 x i32>
1079 %1 = bitcast <4 x i64> %__B to <8 x i32>
1080 %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
1081 %3 = bitcast i8 %__U to <8 x i1>
1082 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
1083 %5 = bitcast <8 x i32> %4 to <4 x i64>
1087 define <4 x i64> @test_mm256_shrdi_epi32(<4 x i64> %__A, <4 x i64> %__B) {
1088 ; CHECK-LABEL: test_mm256_shrdi_epi32:
1089 ; CHECK: # %bb.0: # %entry
1090 ; CHECK-NEXT: vpshrdd $31, %ymm1, %ymm0, %ymm0
1091 ; CHECK-NEXT: ret{{[l|q]}}
1093 %0 = bitcast <4 x i64> %__A to <8 x i32>
1094 %1 = bitcast <4 x i64> %__B to <8 x i32>
1095 %2 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
1096 %3 = bitcast <8 x i32> %2 to <4 x i64>
1100 define <2 x i64> @test_mm_mask_shrdi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1101 ; X86-LABEL: test_mm_mask_shrdi_epi32:
1102 ; X86: # %bb.0: # %entry
1103 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1104 ; X86-NEXT: kmovd %eax, %k1
1105 ; X86-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1}
1108 ; X64-LABEL: test_mm_mask_shrdi_epi32:
1109 ; X64: # %bb.0: # %entry
1110 ; X64-NEXT: kmovd %edi, %k1
1111 ; X64-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1}
1114 %0 = bitcast <2 x i64> %__A to <4 x i32>
1115 %1 = bitcast <2 x i64> %__B to <4 x i32>
1116 %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 7, i32 7, i32 7, i32 7>)
1117 %3 = bitcast <2 x i64> %__S to <4 x i32>
1118 %4 = bitcast i8 %__U to <8 x i1>
1119 %extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1120 %5 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> %3
1121 %6 = bitcast <4 x i32> %5 to <2 x i64>
1125 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1127 define <2 x i64> @test_mm_maskz_shrdi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1128 ; X86-LABEL: test_mm_maskz_shrdi_epi32:
1129 ; X86: # %bb.0: # %entry
1130 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1131 ; X86-NEXT: kmovd %eax, %k1
1132 ; X86-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
1135 ; X64-LABEL: test_mm_maskz_shrdi_epi32:
1136 ; X64: # %bb.0: # %entry
1137 ; X64-NEXT: kmovd %edi, %k1
1138 ; X64-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
1141 %0 = bitcast <2 x i64> %__A to <4 x i32>
1142 %1 = bitcast <2 x i64> %__B to <4 x i32>
1143 %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1144 %3 = bitcast i8 %__U to <8 x i1>
1145 %extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1146 %4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer
1147 %5 = bitcast <4 x i32> %4 to <2 x i64>
1151 define <2 x i64> @test_mm_shrdi_epi32(<2 x i64> %__A, <2 x i64> %__B) {
1152 ; CHECK-LABEL: test_mm_shrdi_epi32:
1153 ; CHECK: # %bb.0: # %entry
1154 ; CHECK-NEXT: vpshrdd $31, %xmm1, %xmm0, %xmm0
1155 ; CHECK-NEXT: ret{{[l|q]}}
1157 %0 = bitcast <2 x i64> %__A to <4 x i32>
1158 %1 = bitcast <2 x i64> %__B to <4 x i32>
1159 %2 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 31, i32 31, i32 31, i32 31>)
1160 %3 = bitcast <4 x i32> %2 to <2 x i64>
1164 define <4 x i64> @test_mm256_mask_shrdi_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1165 ; X86-LABEL: test_mm256_mask_shrdi_epi16:
1166 ; X86: # %bb.0: # %entry
1167 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1168 ; X86-NEXT: vpshrdw $3, %ymm2, %ymm1, %ymm0 {%k1}
1171 ; X64-LABEL: test_mm256_mask_shrdi_epi16:
1172 ; X64: # %bb.0: # %entry
1173 ; X64-NEXT: kmovd %edi, %k1
1174 ; X64-NEXT: vpshrdw $3, %ymm2, %ymm1, %ymm0 {%k1}
1177 %0 = bitcast <4 x i64> %__A to <16 x i16>
1178 %1 = bitcast <4 x i64> %__B to <16 x i16>
1179 %2 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
1180 %3 = bitcast <4 x i64> %__S to <16 x i16>
1181 %4 = bitcast i16 %__U to <16 x i1>
1182 %5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3
1183 %6 = bitcast <16 x i16> %5 to <4 x i64>
1187 declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
1189 define <4 x i64> @test_mm256_maskz_shrdi_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1190 ; X86-LABEL: test_mm256_maskz_shrdi_epi16:
1191 ; X86: # %bb.0: # %entry
1192 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1193 ; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 {%k1} {z}
1196 ; X64-LABEL: test_mm256_maskz_shrdi_epi16:
1197 ; X64: # %bb.0: # %entry
1198 ; X64-NEXT: kmovd %edi, %k1
1199 ; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 {%k1} {z}
1202 %0 = bitcast <4 x i64> %__A to <16 x i16>
1203 %1 = bitcast <4 x i64> %__B to <16 x i16>
1204 %2 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1205 %3 = bitcast i16 %__U to <16 x i1>
1206 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
1207 %5 = bitcast <16 x i16> %4 to <4 x i64>
1211 define <4 x i64> @test_mm256_shrdi_epi16(<4 x i64> %__A, <4 x i64> %__B) {
1212 ; CHECK-LABEL: test_mm256_shrdi_epi16:
1213 ; CHECK: # %bb.0: # %entry
1214 ; CHECK-NEXT: vpshrdw $15, %ymm1, %ymm0, %ymm0
1215 ; CHECK-NEXT: ret{{[l|q]}}
1217 %0 = bitcast <4 x i64> %__A to <16 x i16>
1218 %1 = bitcast <4 x i64> %__B to <16 x i16>
1219 %2 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
1220 %3 = bitcast <16 x i16> %2 to <4 x i64>
1224 define <2 x i64> @test_mm_mask_shrdi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1225 ; X86-LABEL: test_mm_mask_shrdi_epi16:
1226 ; X86: # %bb.0: # %entry
1227 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1228 ; X86-NEXT: kmovd %eax, %k1
1229 ; X86-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1}
1232 ; X64-LABEL: test_mm_mask_shrdi_epi16:
1233 ; X64: # %bb.0: # %entry
1234 ; X64-NEXT: kmovd %edi, %k1
1235 ; X64-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1}
1238 %0 = bitcast <2 x i64> %__A to <8 x i16>
1239 %1 = bitcast <2 x i64> %__B to <8 x i16>
1240 %2 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
1241 %3 = bitcast <2 x i64> %__S to <8 x i16>
1242 %4 = bitcast i8 %__U to <8 x i1>
1243 %5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3
1244 %6 = bitcast <8 x i16> %5 to <2 x i64>
1248 declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
1250 define <2 x i64> @test_mm_maskz_shrdi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1251 ; X86-LABEL: test_mm_maskz_shrdi_epi16:
1252 ; X86: # %bb.0: # %entry
1253 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1254 ; X86-NEXT: kmovd %eax, %k1
1255 ; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
1258 ; X64-LABEL: test_mm_maskz_shrdi_epi16:
1259 ; X64: # %bb.0: # %entry
1260 ; X64-NEXT: kmovd %edi, %k1
1261 ; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
1264 %0 = bitcast <2 x i64> %__A to <8 x i16>
1265 %1 = bitcast <2 x i64> %__B to <8 x i16>
1266 %2 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1267 %3 = bitcast i8 %__U to <8 x i1>
1268 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
1269 %5 = bitcast <8 x i16> %4 to <2 x i64>
1273 define <2 x i64> @test_mm_shrdi_epi16(<2 x i64> %__A, <2 x i64> %__B) {
1274 ; CHECK-LABEL: test_mm_shrdi_epi16:
1275 ; CHECK: # %bb.0: # %entry
1276 ; CHECK-NEXT: vpshrdw $15, %xmm1, %xmm0, %xmm0
1277 ; CHECK-NEXT: ret{{[l|q]}}
1279 %0 = bitcast <2 x i64> %__A to <8 x i16>
1280 %1 = bitcast <2 x i64> %__B to <8 x i16>
1281 %2 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
1282 %3 = bitcast <8 x i16> %2 to <2 x i64>
1286 define <4 x i64> @test_mm256_mask_shldv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1287 ; X86-LABEL: test_mm256_mask_shldv_epi64:
1288 ; X86: # %bb.0: # %entry
1289 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1290 ; X86-NEXT: kmovd %eax, %k1
1291 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1}
1294 ; X64-LABEL: test_mm256_mask_shldv_epi64:
1295 ; X64: # %bb.0: # %entry
1296 ; X64-NEXT: kmovd %edi, %k1
1297 ; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1}
1300 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B)
1301 %1 = bitcast i8 %__U to <8 x i1>
1302 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1303 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__S
1307 define <4 x i64> @test_mm256_maskz_shldv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1308 ; X86-LABEL: test_mm256_maskz_shldv_epi64:
1309 ; X86: # %bb.0: # %entry
1310 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1311 ; X86-NEXT: kmovd %eax, %k1
1312 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1315 ; X64-LABEL: test_mm256_maskz_shldv_epi64:
1316 ; X64: # %bb.0: # %entry
1317 ; X64-NEXT: kmovd %edi, %k1
1318 ; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1321 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B)
1322 %1 = bitcast i8 %__U to <8 x i1>
1323 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1324 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
1328 define <4 x i64> @test_mm256_shldv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1329 ; CHECK-LABEL: test_mm256_shldv_epi64:
1330 ; CHECK: # %bb.0: # %entry
1331 ; CHECK-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
1332 ; CHECK-NEXT: ret{{[l|q]}}
1334 %0 = tail call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B)
1338 define <2 x i64> @test_mm_mask_shldv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1339 ; X86-LABEL: test_mm_mask_shldv_epi64:
1340 ; X86: # %bb.0: # %entry
1341 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1342 ; X86-NEXT: kmovd %eax, %k1
1343 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1}
1346 ; X64-LABEL: test_mm_mask_shldv_epi64:
1347 ; X64: # %bb.0: # %entry
1348 ; X64-NEXT: kmovd %edi, %k1
1349 ; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1}
1352 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B)
1353 %1 = bitcast i8 %__U to <8 x i1>
1354 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1355 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__S
1359 define <2 x i64> @test_mm_maskz_shldv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1360 ; X86-LABEL: test_mm_maskz_shldv_epi64:
1361 ; X86: # %bb.0: # %entry
1362 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1363 ; X86-NEXT: kmovd %eax, %k1
1364 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1367 ; X64-LABEL: test_mm_maskz_shldv_epi64:
1368 ; X64: # %bb.0: # %entry
1369 ; X64-NEXT: kmovd %edi, %k1
1370 ; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1373 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B)
1374 %1 = bitcast i8 %__U to <8 x i1>
1375 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1376 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
1380 define <2 x i64> @test_mm_shldv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1381 ; CHECK-LABEL: test_mm_shldv_epi64:
1382 ; CHECK: # %bb.0: # %entry
1383 ; CHECK-NEXT: vpshldvq %xmm2, %xmm1, %xmm0
1384 ; CHECK-NEXT: ret{{[l|q]}}
1386 %0 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B)
1390 define <4 x i64> @test_mm256_mask_shldv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1391 ; X86-LABEL: test_mm256_mask_shldv_epi32:
1392 ; X86: # %bb.0: # %entry
1393 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1394 ; X86-NEXT: kmovd %eax, %k1
1395 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1}
1398 ; X64-LABEL: test_mm256_mask_shldv_epi32:
1399 ; X64: # %bb.0: # %entry
1400 ; X64-NEXT: kmovd %edi, %k1
1401 ; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1}
1404 %0 = bitcast <4 x i64> %__S to <8 x i32>
1405 %1 = bitcast <4 x i64> %__A to <8 x i32>
1406 %2 = bitcast <4 x i64> %__B to <8 x i32>
1407 %3 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2)
1408 %4 = bitcast i8 %__U to <8 x i1>
1409 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0
1410 %6 = bitcast <8 x i32> %5 to <4 x i64>
1414 define <4 x i64> @test_mm256_maskz_shldv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1415 ; X86-LABEL: test_mm256_maskz_shldv_epi32:
1416 ; X86: # %bb.0: # %entry
1417 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1418 ; X86-NEXT: kmovd %eax, %k1
1419 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1422 ; X64-LABEL: test_mm256_maskz_shldv_epi32:
1423 ; X64: # %bb.0: # %entry
1424 ; X64-NEXT: kmovd %edi, %k1
1425 ; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1428 %0 = bitcast <4 x i64> %__S to <8 x i32>
1429 %1 = bitcast <4 x i64> %__A to <8 x i32>
1430 %2 = bitcast <4 x i64> %__B to <8 x i32>
1431 %3 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2)
1432 %4 = bitcast i8 %__U to <8 x i1>
1433 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer
1434 %6 = bitcast <8 x i32> %5 to <4 x i64>
1438 define <4 x i64> @test_mm256_shldv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1439 ; CHECK-LABEL: test_mm256_shldv_epi32:
1440 ; CHECK: # %bb.0: # %entry
1441 ; CHECK-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
1442 ; CHECK-NEXT: ret{{[l|q]}}
1444 %0 = bitcast <4 x i64> %__S to <8 x i32>
1445 %1 = bitcast <4 x i64> %__A to <8 x i32>
1446 %2 = bitcast <4 x i64> %__B to <8 x i32>
1447 %3 = tail call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2)
1448 %4 = bitcast <8 x i32> %3 to <4 x i64>
1452 define <2 x i64> @test_mm_mask_shldv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1453 ; X86-LABEL: test_mm_mask_shldv_epi32:
1454 ; X86: # %bb.0: # %entry
1455 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1456 ; X86-NEXT: kmovd %eax, %k1
1457 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1}
1460 ; X64-LABEL: test_mm_mask_shldv_epi32:
1461 ; X64: # %bb.0: # %entry
1462 ; X64-NEXT: kmovd %edi, %k1
1463 ; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1}
1466 %0 = bitcast <2 x i64> %__S to <4 x i32>
1467 %1 = bitcast <2 x i64> %__A to <4 x i32>
1468 %2 = bitcast <2 x i64> %__B to <4 x i32>
1469 %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
1470 %4 = bitcast i8 %__U to <8 x i1>
1471 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1472 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %0
1473 %6 = bitcast <4 x i32> %5 to <2 x i64>
1477 define <2 x i64> @test_mm_maskz_shldv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1478 ; X86-LABEL: test_mm_maskz_shldv_epi32:
1479 ; X86: # %bb.0: # %entry
1480 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1481 ; X86-NEXT: kmovd %eax, %k1
1482 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1485 ; X64-LABEL: test_mm_maskz_shldv_epi32:
1486 ; X64: # %bb.0: # %entry
1487 ; X64-NEXT: kmovd %edi, %k1
1488 ; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1491 %0 = bitcast <2 x i64> %__S to <4 x i32>
1492 %1 = bitcast <2 x i64> %__A to <4 x i32>
1493 %2 = bitcast <2 x i64> %__B to <4 x i32>
1494 %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
1495 %4 = bitcast i8 %__U to <8 x i1>
1496 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1497 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer
1498 %6 = bitcast <4 x i32> %5 to <2 x i64>
1502 define <2 x i64> @test_mm_shldv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1503 ; CHECK-LABEL: test_mm_shldv_epi32:
1504 ; CHECK: # %bb.0: # %entry
1505 ; CHECK-NEXT: vpshldvd %xmm2, %xmm1, %xmm0
1506 ; CHECK-NEXT: ret{{[l|q]}}
1508 %0 = bitcast <2 x i64> %__S to <4 x i32>
1509 %1 = bitcast <2 x i64> %__A to <4 x i32>
1510 %2 = bitcast <2 x i64> %__B to <4 x i32>
1511 %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
1512 %4 = bitcast <4 x i32> %3 to <2 x i64>
1516 define <4 x i64> @test_mm256_mask_shldv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1517 ; X86-LABEL: test_mm256_mask_shldv_epi16:
1518 ; X86: # %bb.0: # %entry
1519 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1520 ; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1}
1523 ; X64-LABEL: test_mm256_mask_shldv_epi16:
1524 ; X64: # %bb.0: # %entry
1525 ; X64-NEXT: kmovd %edi, %k1
1526 ; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1}
1529 %0 = bitcast <4 x i64> %__S to <16 x i16>
1530 %1 = bitcast <4 x i64> %__A to <16 x i16>
1531 %2 = bitcast <4 x i64> %__B to <16 x i16>
1532 %3 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
1533 %4 = bitcast i16 %__U to <16 x i1>
1534 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0
1535 %6 = bitcast <16 x i16> %5 to <4 x i64>
1539 define <4 x i64> @test_mm256_maskz_shldv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1540 ; X86-LABEL: test_mm256_maskz_shldv_epi16:
1541 ; X86: # %bb.0: # %entry
1542 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1543 ; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1546 ; X64-LABEL: test_mm256_maskz_shldv_epi16:
1547 ; X64: # %bb.0: # %entry
1548 ; X64-NEXT: kmovd %edi, %k1
1549 ; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1552 %0 = bitcast <4 x i64> %__S to <16 x i16>
1553 %1 = bitcast <4 x i64> %__A to <16 x i16>
1554 %2 = bitcast <4 x i64> %__B to <16 x i16>
1555 %3 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
1556 %4 = bitcast i16 %__U to <16 x i1>
1557 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer
1558 %6 = bitcast <16 x i16> %5 to <4 x i64>
1562 define <4 x i64> @test_mm256_shldv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1563 ; CHECK-LABEL: test_mm256_shldv_epi16:
1564 ; CHECK: # %bb.0: # %entry
1565 ; CHECK-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
1566 ; CHECK-NEXT: ret{{[l|q]}}
1568 %0 = bitcast <4 x i64> %__S to <16 x i16>
1569 %1 = bitcast <4 x i64> %__A to <16 x i16>
1570 %2 = bitcast <4 x i64> %__B to <16 x i16>
1571 %3 = tail call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
1572 %4 = bitcast <16 x i16> %3 to <4 x i64>
1576 define <2 x i64> @test_mm_mask_shldv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1577 ; X86-LABEL: test_mm_mask_shldv_epi16:
1578 ; X86: # %bb.0: # %entry
1579 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1580 ; X86-NEXT: kmovd %eax, %k1
1581 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1}
1584 ; X64-LABEL: test_mm_mask_shldv_epi16:
1585 ; X64: # %bb.0: # %entry
1586 ; X64-NEXT: kmovd %edi, %k1
1587 ; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1}
1590 %0 = bitcast <2 x i64> %__S to <8 x i16>
1591 %1 = bitcast <2 x i64> %__A to <8 x i16>
1592 %2 = bitcast <2 x i64> %__B to <8 x i16>
1593 %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
1594 %4 = bitcast i8 %__U to <8 x i1>
1595 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0
1596 %6 = bitcast <8 x i16> %5 to <2 x i64>
1600 define <2 x i64> @test_mm_maskz_shldv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1601 ; X86-LABEL: test_mm_maskz_shldv_epi16:
1602 ; X86: # %bb.0: # %entry
1603 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1604 ; X86-NEXT: kmovd %eax, %k1
1605 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1608 ; X64-LABEL: test_mm_maskz_shldv_epi16:
1609 ; X64: # %bb.0: # %entry
1610 ; X64-NEXT: kmovd %edi, %k1
1611 ; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1614 %0 = bitcast <2 x i64> %__S to <8 x i16>
1615 %1 = bitcast <2 x i64> %__A to <8 x i16>
1616 %2 = bitcast <2 x i64> %__B to <8 x i16>
1617 %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
1618 %4 = bitcast i8 %__U to <8 x i1>
1619 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer
1620 %6 = bitcast <8 x i16> %5 to <2 x i64>
1624 define <2 x i64> @test_mm_shldv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1625 ; CHECK-LABEL: test_mm_shldv_epi16:
1626 ; CHECK: # %bb.0: # %entry
1627 ; CHECK-NEXT: vpshldvw %xmm2, %xmm1, %xmm0
1628 ; CHECK-NEXT: ret{{[l|q]}}
1630 %0 = bitcast <2 x i64> %__S to <8 x i16>
1631 %1 = bitcast <2 x i64> %__A to <8 x i16>
1632 %2 = bitcast <2 x i64> %__B to <8 x i16>
1633 %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
1634 %4 = bitcast <8 x i16> %3 to <2 x i64>
1638 define <4 x i64> @test_mm256_mask_shrdv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1639 ; X86-LABEL: test_mm256_mask_shrdv_epi64:
1640 ; X86: # %bb.0: # %entry
1641 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1642 ; X86-NEXT: kmovd %eax, %k1
1643 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1}
1646 ; X64-LABEL: test_mm256_mask_shrdv_epi64:
1647 ; X64: # %bb.0: # %entry
1648 ; X64-NEXT: kmovd %edi, %k1
1649 ; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1}
1652 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__S, <4 x i64> %__B)
1653 %1 = bitcast i8 %__U to <8 x i1>
1654 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1655 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__S
1659 define <4 x i64> @test_mm256_maskz_shrdv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1660 ; X86-LABEL: test_mm256_maskz_shrdv_epi64:
1661 ; X86: # %bb.0: # %entry
1662 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1663 ; X86-NEXT: kmovd %eax, %k1
1664 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1667 ; X64-LABEL: test_mm256_maskz_shrdv_epi64:
1668 ; X64: # %bb.0: # %entry
1669 ; X64-NEXT: kmovd %edi, %k1
1670 ; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z}
1673 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__S, <4 x i64> %__B)
1674 %1 = bitcast i8 %__U to <8 x i1>
1675 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1676 %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
1680 define <4 x i64> @test_mm256_shrdv_epi64(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1681 ; CHECK-LABEL: test_mm256_shrdv_epi64:
1682 ; CHECK: # %bb.0: # %entry
1683 ; CHECK-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0
1684 ; CHECK-NEXT: ret{{[l|q]}}
1686 %0 = tail call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %__A, <4 x i64> %__S, <4 x i64> %__B)
1690 define <2 x i64> @test_mm_mask_shrdv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1691 ; X86-LABEL: test_mm_mask_shrdv_epi64:
1692 ; X86: # %bb.0: # %entry
1693 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1694 ; X86-NEXT: kmovd %eax, %k1
1695 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1}
1698 ; X64-LABEL: test_mm_mask_shrdv_epi64:
1699 ; X64: # %bb.0: # %entry
1700 ; X64-NEXT: kmovd %edi, %k1
1701 ; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1}
1704 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__S, <2 x i64> %__B)
1705 %1 = bitcast i8 %__U to <8 x i1>
1706 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1707 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__S
1711 define <2 x i64> @test_mm_maskz_shrdv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1712 ; X86-LABEL: test_mm_maskz_shrdv_epi64:
1713 ; X86: # %bb.0: # %entry
1714 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1715 ; X86-NEXT: kmovd %eax, %k1
1716 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1719 ; X64-LABEL: test_mm_maskz_shrdv_epi64:
1720 ; X64: # %bb.0: # %entry
1721 ; X64-NEXT: kmovd %edi, %k1
1722 ; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z}
1725 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__S, <2 x i64> %__B)
1726 %1 = bitcast i8 %__U to <8 x i1>
1727 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
1728 %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
1732 define <2 x i64> @test_mm_shrdv_epi64(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1733 ; CHECK-LABEL: test_mm_shrdv_epi64:
1734 ; CHECK: # %bb.0: # %entry
1735 ; CHECK-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0
1736 ; CHECK-NEXT: ret{{[l|q]}}
1738 %0 = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %__A, <2 x i64> %__S, <2 x i64> %__B)
1742 define <4 x i64> @test_mm256_mask_shrdv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1743 ; X86-LABEL: test_mm256_mask_shrdv_epi32:
1744 ; X86: # %bb.0: # %entry
1745 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1746 ; X86-NEXT: kmovd %eax, %k1
1747 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1}
1750 ; X64-LABEL: test_mm256_mask_shrdv_epi32:
1751 ; X64: # %bb.0: # %entry
1752 ; X64-NEXT: kmovd %edi, %k1
1753 ; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1}
1756 %0 = bitcast <4 x i64> %__S to <8 x i32>
1757 %1 = bitcast <4 x i64> %__A to <8 x i32>
1758 %2 = bitcast <4 x i64> %__B to <8 x i32>
1759 %3 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %2)
1760 %4 = bitcast i8 %__U to <8 x i1>
1761 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0
1762 %6 = bitcast <8 x i32> %5 to <4 x i64>
1766 define <4 x i64> @test_mm256_maskz_shrdv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1767 ; X86-LABEL: test_mm256_maskz_shrdv_epi32:
1768 ; X86: # %bb.0: # %entry
1769 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1770 ; X86-NEXT: kmovd %eax, %k1
1771 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1774 ; X64-LABEL: test_mm256_maskz_shrdv_epi32:
1775 ; X64: # %bb.0: # %entry
1776 ; X64-NEXT: kmovd %edi, %k1
1777 ; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z}
1780 %0 = bitcast <4 x i64> %__S to <8 x i32>
1781 %1 = bitcast <4 x i64> %__A to <8 x i32>
1782 %2 = bitcast <4 x i64> %__B to <8 x i32>
1783 %3 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %2)
1784 %4 = bitcast i8 %__U to <8 x i1>
1785 %5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer
1786 %6 = bitcast <8 x i32> %5 to <4 x i64>
1790 define <4 x i64> @test_mm256_shrdv_epi32(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1791 ; CHECK-LABEL: test_mm256_shrdv_epi32:
1792 ; CHECK: # %bb.0: # %entry
1793 ; CHECK-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0
1794 ; CHECK-NEXT: ret{{[l|q]}}
1796 %0 = bitcast <4 x i64> %__S to <8 x i32>
1797 %1 = bitcast <4 x i64> %__A to <8 x i32>
1798 %2 = bitcast <4 x i64> %__B to <8 x i32>
1799 %3 = tail call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %1, <8 x i32> %0, <8 x i32> %2)
1800 %4 = bitcast <8 x i32> %3 to <4 x i64>
1804 define <2 x i64> @test_mm_mask_shrdv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1805 ; X86-LABEL: test_mm_mask_shrdv_epi32:
1806 ; X86: # %bb.0: # %entry
1807 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1808 ; X86-NEXT: kmovd %eax, %k1
1809 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1}
1812 ; X64-LABEL: test_mm_mask_shrdv_epi32:
1813 ; X64: # %bb.0: # %entry
1814 ; X64-NEXT: kmovd %edi, %k1
1815 ; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1}
1818 %0 = bitcast <2 x i64> %__S to <4 x i32>
1819 %1 = bitcast <2 x i64> %__A to <4 x i32>
1820 %2 = bitcast <2 x i64> %__B to <4 x i32>
1821 %3 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %2)
1822 %4 = bitcast i8 %__U to <8 x i1>
1823 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1824 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> %0
1825 %6 = bitcast <4 x i32> %5 to <2 x i64>
1829 define <2 x i64> @test_mm_maskz_shrdv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1830 ; X86-LABEL: test_mm_maskz_shrdv_epi32:
1831 ; X86: # %bb.0: # %entry
1832 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1833 ; X86-NEXT: kmovd %eax, %k1
1834 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1837 ; X64-LABEL: test_mm_maskz_shrdv_epi32:
1838 ; X64: # %bb.0: # %entry
1839 ; X64-NEXT: kmovd %edi, %k1
1840 ; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z}
1843 %0 = bitcast <2 x i64> %__S to <4 x i32>
1844 %1 = bitcast <2 x i64> %__A to <4 x i32>
1845 %2 = bitcast <2 x i64> %__B to <4 x i32>
1846 %3 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %2)
1847 %4 = bitcast i8 %__U to <8 x i1>
1848 %extract.i = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1849 %5 = select <4 x i1> %extract.i, <4 x i32> %3, <4 x i32> zeroinitializer
1850 %6 = bitcast <4 x i32> %5 to <2 x i64>
1854 define <2 x i64> @test_mm_shrdv_epi32(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1855 ; CHECK-LABEL: test_mm_shrdv_epi32:
1856 ; CHECK: # %bb.0: # %entry
1857 ; CHECK-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0
1858 ; CHECK-NEXT: ret{{[l|q]}}
1860 %0 = bitcast <2 x i64> %__S to <4 x i32>
1861 %1 = bitcast <2 x i64> %__A to <4 x i32>
1862 %2 = bitcast <2 x i64> %__B to <4 x i32>
1863 %3 = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %2)
1864 %4 = bitcast <4 x i32> %3 to <2 x i64>
1868 define <4 x i64> @test_mm256_mask_shrdv_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
1869 ; X86-LABEL: test_mm256_mask_shrdv_epi16:
1870 ; X86: # %bb.0: # %entry
1871 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1872 ; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1}
1875 ; X64-LABEL: test_mm256_mask_shrdv_epi16:
1876 ; X64: # %bb.0: # %entry
1877 ; X64-NEXT: kmovd %edi, %k1
1878 ; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1}
1881 %0 = bitcast <4 x i64> %__S to <16 x i16>
1882 %1 = bitcast <4 x i64> %__A to <16 x i16>
1883 %2 = bitcast <4 x i64> %__B to <16 x i16>
1884 %3 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> %2)
1885 %4 = bitcast i16 %__U to <16 x i1>
1886 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0
1887 %6 = bitcast <16 x i16> %5 to <4 x i64>
1891 define <4 x i64> @test_mm256_maskz_shrdv_epi16(i16 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1892 ; X86-LABEL: test_mm256_maskz_shrdv_epi16:
1893 ; X86: # %bb.0: # %entry
1894 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1895 ; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1898 ; X64-LABEL: test_mm256_maskz_shrdv_epi16:
1899 ; X64: # %bb.0: # %entry
1900 ; X64-NEXT: kmovd %edi, %k1
1901 ; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z}
1904 %0 = bitcast <4 x i64> %__S to <16 x i16>
1905 %1 = bitcast <4 x i64> %__A to <16 x i16>
1906 %2 = bitcast <4 x i64> %__B to <16 x i16>
1907 %3 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> %2)
1908 %4 = bitcast i16 %__U to <16 x i1>
1909 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer
1910 %6 = bitcast <16 x i16> %5 to <4 x i64>
1914 define <4 x i64> @test_mm256_shrdv_epi16(<4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
1915 ; CHECK-LABEL: test_mm256_shrdv_epi16:
1916 ; CHECK: # %bb.0: # %entry
1917 ; CHECK-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0
1918 ; CHECK-NEXT: ret{{[l|q]}}
1920 %0 = bitcast <4 x i64> %__S to <16 x i16>
1921 %1 = bitcast <4 x i64> %__A to <16 x i16>
1922 %2 = bitcast <4 x i64> %__B to <16 x i16>
1923 %3 = tail call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %1, <16 x i16> %0, <16 x i16> %2)
1924 %4 = bitcast <16 x i16> %3 to <4 x i64>
1928 define <2 x i64> @test_mm_mask_shrdv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
1929 ; X86-LABEL: test_mm_mask_shrdv_epi16:
1930 ; X86: # %bb.0: # %entry
1931 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1932 ; X86-NEXT: kmovd %eax, %k1
1933 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1}
1936 ; X64-LABEL: test_mm_mask_shrdv_epi16:
1937 ; X64: # %bb.0: # %entry
1938 ; X64-NEXT: kmovd %edi, %k1
1939 ; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1}
1942 %0 = bitcast <2 x i64> %__S to <8 x i16>
1943 %1 = bitcast <2 x i64> %__A to <8 x i16>
1944 %2 = bitcast <2 x i64> %__B to <8 x i16>
1945 %3 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> %2)
1946 %4 = bitcast i8 %__U to <8 x i1>
1947 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0
1948 %6 = bitcast <8 x i16> %5 to <2 x i64>
1952 define <2 x i64> @test_mm_maskz_shrdv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1953 ; X86-LABEL: test_mm_maskz_shrdv_epi16:
1954 ; X86: # %bb.0: # %entry
1955 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1956 ; X86-NEXT: kmovd %eax, %k1
1957 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1960 ; X64-LABEL: test_mm_maskz_shrdv_epi16:
1961 ; X64: # %bb.0: # %entry
1962 ; X64-NEXT: kmovd %edi, %k1
1963 ; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z}
1966 %0 = bitcast <2 x i64> %__S to <8 x i16>
1967 %1 = bitcast <2 x i64> %__A to <8 x i16>
1968 %2 = bitcast <2 x i64> %__B to <8 x i16>
1969 %3 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> %2)
1970 %4 = bitcast i8 %__U to <8 x i1>
1971 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer
1972 %6 = bitcast <8 x i16> %5 to <2 x i64>
1976 define <2 x i64> @test_mm_shrdv_epi16(<2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
1977 ; CHECK-LABEL: test_mm_shrdv_epi16:
1978 ; CHECK: # %bb.0: # %entry
1979 ; CHECK-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0
1980 ; CHECK-NEXT: ret{{[l|q]}}
1982 %0 = bitcast <2 x i64> %__S to <8 x i16>
1983 %1 = bitcast <2 x i64> %__A to <8 x i16>
1984 %2 = bitcast <2 x i64> %__B to <8 x i16>
1985 %3 = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %1, <8 x i16> %0, <8 x i16> %2)
1986 %4 = bitcast <8 x i16> %3 to <2 x i64>
1990 declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8)
1991 declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16)
1992 declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
1993 declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>)
1994 declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8)
1995 declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16)
1996 declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
1997 declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>)
1998 declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16)
1999 declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32)
2000 declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>)
2001 declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>)
2002 declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16)
2003 declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32)
2004 declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>)
2005 declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>)