1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 define <8 x i16> @test_mask_expand_load_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
6 ; X86-LABEL: test_mask_expand_load_w_128:
8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
11 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
12 ; X86-NEXT: retl # encoding: [0xc3]
14 ; X64-LABEL: test_mask_expand_load_w_128:
16 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
17 ; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
18 ; X64-NEXT: retq # encoding: [0xc3]
19 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> %data, i8 %mask)
23 define <8 x i16> @test_maskz_expand_load_w_128(ptr %addr, i8 %mask) {
24 ; X86-LABEL: test_maskz_expand_load_w_128:
26 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
27 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
28 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
29 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00]
30 ; X86-NEXT: retl # encoding: [0xc3]
32 ; X64-LABEL: test_maskz_expand_load_w_128:
34 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
35 ; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07]
36 ; X64-NEXT: retq # encoding: [0xc3]
37 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> zeroinitializer, i8 %mask)
41 declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> %data, i8 %mask)
43 define <8 x i16> @test_expand_load_w_128(ptr %addr, <8 x i16> %data) {
44 ; X86-LABEL: test_expand_load_w_128:
46 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
47 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
48 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
49 ; X86-NEXT: retl # encoding: [0xc3]
51 ; X64-LABEL: test_expand_load_w_128:
53 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
54 ; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
55 ; X64-NEXT: retq # encoding: [0xc3]
56 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(ptr %addr, <8 x i16> %data, i8 -1)
60 define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
61 ; CHECK-LABEL: test_expand_w_128:
63 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
64 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
68 define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
69 ; X86-LABEL: test_mask_expand_w_128:
71 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
72 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
73 ; X86-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
74 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
75 ; X86-NEXT: retl # encoding: [0xc3]
77 ; X64-LABEL: test_mask_expand_w_128:
79 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
80 ; X64-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
81 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
82 ; X64-NEXT: retq # encoding: [0xc3]
83 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
87 define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
88 ; X86-LABEL: test_maskz_expand_w_128:
90 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
91 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
92 ; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
93 ; X86-NEXT: retl # encoding: [0xc3]
95 ; X64-LABEL: test_maskz_expand_w_128:
97 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
98 ; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
99 ; X64-NEXT: retq # encoding: [0xc3]
100 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
104 declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
106 define <16 x i8> @test_mask_expand_load_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
107 ; X86-LABEL: test_mask_expand_load_b_128:
109 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
110 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
111 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
112 ; X86-NEXT: retl # encoding: [0xc3]
114 ; X64-LABEL: test_mask_expand_load_b_128:
116 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
117 ; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
118 ; X64-NEXT: retq # encoding: [0xc3]
119 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> %data, i16 %mask)
123 define <16 x i8> @test_maskz_expand_load_b_128(ptr %addr, i16 %mask) {
124 ; X86-LABEL: test_maskz_expand_load_b_128:
126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
127 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
128 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00]
129 ; X86-NEXT: retl # encoding: [0xc3]
131 ; X64-LABEL: test_maskz_expand_load_b_128:
133 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
134 ; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07]
135 ; X64-NEXT: retq # encoding: [0xc3]
136 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> zeroinitializer, i16 %mask)
140 declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> %data, i16 %mask)
142 define <16 x i8> @test_expand_load_b_128(ptr %addr, <16 x i8> %data) {
143 ; X86-LABEL: test_expand_load_b_128:
145 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
146 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
147 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
148 ; X86-NEXT: retl # encoding: [0xc3]
150 ; X64-LABEL: test_expand_load_b_128:
152 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
153 ; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
154 ; X64-NEXT: retq # encoding: [0xc3]
155 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(ptr %addr, <16 x i8> %data, i16 -1)
159 define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
160 ; CHECK-LABEL: test_expand_b_128:
162 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
163 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
167 define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
168 ; X86-LABEL: test_mask_expand_b_128:
170 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
171 ; X86-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
172 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
173 ; X86-NEXT: retl # encoding: [0xc3]
175 ; X64-LABEL: test_mask_expand_b_128:
177 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
178 ; X64-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
179 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
180 ; X64-NEXT: retq # encoding: [0xc3]
181 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
185 define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
186 ; X86-LABEL: test_maskz_expand_b_128:
188 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
189 ; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
190 ; X86-NEXT: retl # encoding: [0xc3]
192 ; X64-LABEL: test_maskz_expand_b_128:
194 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
195 ; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
196 ; X64-NEXT: retq # encoding: [0xc3]
197 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
201 declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
203 define void @test_mask_compress_store_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
204 ; X86-LABEL: test_mask_compress_store_w_128:
206 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
207 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
208 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
209 ; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
210 ; X86-NEXT: retl # encoding: [0xc3]
212 ; X64-LABEL: test_mask_compress_store_w_128:
214 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
215 ; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
216 ; X64-NEXT: retq # encoding: [0xc3]
217 call void @llvm.x86.avx512.mask.compress.store.w.128(ptr %addr, <8 x i16> %data, i8 %mask)
221 declare void @llvm.x86.avx512.mask.compress.store.w.128(ptr %addr, <8 x i16> %data, i8 %mask)
223 define void @test_compress_store_w_128(ptr %addr, <8 x i16> %data) {
224 ; X86-LABEL: test_compress_store_w_128:
226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
227 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
228 ; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
229 ; X86-NEXT: retl # encoding: [0xc3]
231 ; X64-LABEL: test_compress_store_w_128:
233 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
234 ; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
235 ; X64-NEXT: retq # encoding: [0xc3]
236 call void @llvm.x86.avx512.mask.compress.store.w.128(ptr %addr, <8 x i16> %data, i8 -1)
240 define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
241 ; X86-LABEL: test_mask_compress_w_128:
243 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
244 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
245 ; X86-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
246 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
247 ; X86-NEXT: retl # encoding: [0xc3]
249 ; X64-LABEL: test_mask_compress_w_128:
251 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
252 ; X64-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
253 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
254 ; X64-NEXT: retq # encoding: [0xc3]
255 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
259 define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
260 ; X86-LABEL: test_maskz_compress_w_128:
262 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
263 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
264 ; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
265 ; X86-NEXT: retl # encoding: [0xc3]
267 ; X64-LABEL: test_maskz_compress_w_128:
269 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
270 ; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
271 ; X64-NEXT: retq # encoding: [0xc3]
272 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
276 define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
277 ; CHECK-LABEL: test_compress_w_128:
279 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
280 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
284 declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
286 define void @test_mask_compress_store_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
287 ; X86-LABEL: test_mask_compress_store_b_128:
289 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
290 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
291 ; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
292 ; X86-NEXT: retl # encoding: [0xc3]
294 ; X64-LABEL: test_mask_compress_store_b_128:
296 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
297 ; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
298 ; X64-NEXT: retq # encoding: [0xc3]
299 call void @llvm.x86.avx512.mask.compress.store.b.128(ptr %addr, <16 x i8> %data, i16 %mask)
303 declare void @llvm.x86.avx512.mask.compress.store.b.128(ptr %addr, <16 x i8> %data, i16 %mask)
305 define void @test_compress_store_b_128(ptr %addr, <16 x i8> %data) {
306 ; X86-LABEL: test_compress_store_b_128:
308 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
309 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
310 ; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
311 ; X86-NEXT: retl # encoding: [0xc3]
313 ; X64-LABEL: test_compress_store_b_128:
315 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
316 ; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
317 ; X64-NEXT: retq # encoding: [0xc3]
318 call void @llvm.x86.avx512.mask.compress.store.b.128(ptr %addr, <16 x i8> %data, i16 -1)
322 define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
323 ; X86-LABEL: test_mask_compress_b_128:
325 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
326 ; X86-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
327 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
328 ; X86-NEXT: retl # encoding: [0xc3]
330 ; X64-LABEL: test_mask_compress_b_128:
332 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
333 ; X64-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
334 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
335 ; X64-NEXT: retq # encoding: [0xc3]
336 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
340 define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
341 ; X86-LABEL: test_maskz_compress_b_128:
343 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
344 ; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
345 ; X86-NEXT: retl # encoding: [0xc3]
347 ; X64-LABEL: test_maskz_compress_b_128:
349 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
350 ; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
351 ; X64-NEXT: retq # encoding: [0xc3]
352 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
356 define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
357 ; CHECK-LABEL: test_compress_b_128:
359 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
360 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
364 declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
366 define <16 x i16> @test_mask_expand_load_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
367 ; X86-LABEL: test_mask_expand_load_w_256:
369 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
370 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
371 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
372 ; X86-NEXT: retl # encoding: [0xc3]
374 ; X64-LABEL: test_mask_expand_load_w_256:
376 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
377 ; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
378 ; X64-NEXT: retq # encoding: [0xc3]
379 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> %data, i16 %mask)
383 define <16 x i16> @test_maskz_expand_load_w_256(ptr %addr, i16 %mask) {
384 ; X86-LABEL: test_maskz_expand_load_w_256:
386 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
387 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
388 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00]
389 ; X86-NEXT: retl # encoding: [0xc3]
391 ; X64-LABEL: test_maskz_expand_load_w_256:
393 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
394 ; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07]
395 ; X64-NEXT: retq # encoding: [0xc3]
396 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> zeroinitializer, i16 %mask)
400 declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> %data, i16 %mask)
402 define <16 x i16> @test_expand_load_w_256(ptr %addr, <16 x i16> %data) {
403 ; X86-LABEL: test_expand_load_w_256:
405 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
406 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
407 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
408 ; X86-NEXT: retl # encoding: [0xc3]
410 ; X64-LABEL: test_expand_load_w_256:
412 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
413 ; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
414 ; X64-NEXT: retq # encoding: [0xc3]
415 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(ptr %addr, <16 x i16> %data, i16 -1)
419 define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
420 ; CHECK-LABEL: test_expand_w_256:
422 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
423 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
427 define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
428 ; X86-LABEL: test_mask_expand_w_256:
430 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
431 ; X86-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
432 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
433 ; X86-NEXT: retl # encoding: [0xc3]
435 ; X64-LABEL: test_mask_expand_w_256:
437 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
438 ; X64-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
439 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
440 ; X64-NEXT: retq # encoding: [0xc3]
441 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
445 define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
446 ; X86-LABEL: test_maskz_expand_w_256:
448 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
449 ; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
450 ; X86-NEXT: retl # encoding: [0xc3]
452 ; X64-LABEL: test_maskz_expand_w_256:
454 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
455 ; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
456 ; X64-NEXT: retq # encoding: [0xc3]
457 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
461 declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
463 define <32 x i8> @test_mask_expand_load_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
464 ; X86-LABEL: test_mask_expand_load_b_256:
466 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
467 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
468 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
469 ; X86-NEXT: retl # encoding: [0xc3]
471 ; X64-LABEL: test_mask_expand_load_b_256:
473 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
474 ; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
475 ; X64-NEXT: retq # encoding: [0xc3]
476 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> %data, i32 %mask)
480 define <32 x i8> @test_maskz_expand_load_b_256(ptr %addr, i32 %mask) {
481 ; X86-LABEL: test_maskz_expand_load_b_256:
483 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
484 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
485 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00]
486 ; X86-NEXT: retl # encoding: [0xc3]
488 ; X64-LABEL: test_maskz_expand_load_b_256:
490 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
491 ; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07]
492 ; X64-NEXT: retq # encoding: [0xc3]
493 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> zeroinitializer, i32 %mask)
497 declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> %data, i32 %mask)
499 define <32 x i8> @test_expand_load_b_256(ptr %addr, <32 x i8> %data) {
500 ; X86-LABEL: test_expand_load_b_256:
502 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
503 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
504 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
505 ; X86-NEXT: retl # encoding: [0xc3]
507 ; X64-LABEL: test_expand_load_b_256:
509 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
510 ; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
511 ; X64-NEXT: retq # encoding: [0xc3]
512 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(ptr %addr, <32 x i8> %data, i32 -1)
516 define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
517 ; CHECK-LABEL: test_expand_b_256:
519 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
520 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
524 define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
525 ; X86-LABEL: test_mask_expand_b_256:
527 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
528 ; X86-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
529 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
530 ; X86-NEXT: retl # encoding: [0xc3]
532 ; X64-LABEL: test_mask_expand_b_256:
534 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
535 ; X64-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
536 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
537 ; X64-NEXT: retq # encoding: [0xc3]
538 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
542 define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
543 ; X86-LABEL: test_maskz_expand_b_256:
545 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
546 ; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
547 ; X86-NEXT: retl # encoding: [0xc3]
549 ; X64-LABEL: test_maskz_expand_b_256:
551 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
552 ; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
553 ; X64-NEXT: retq # encoding: [0xc3]
554 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
558 declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
560 define void @test_mask_compress_store_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
561 ; X86-LABEL: test_mask_compress_store_w_256:
563 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
564 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
565 ; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
566 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
567 ; X86-NEXT: retl # encoding: [0xc3]
569 ; X64-LABEL: test_mask_compress_store_w_256:
571 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
572 ; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
573 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
574 ; X64-NEXT: retq # encoding: [0xc3]
575 call void @llvm.x86.avx512.mask.compress.store.w.256(ptr %addr, <16 x i16> %data, i16 %mask)
579 declare void @llvm.x86.avx512.mask.compress.store.w.256(ptr %addr, <16 x i16> %data, i16 %mask)
581 define void @test_compress_store_w_256(ptr %addr, <16 x i16> %data) {
582 ; X86-LABEL: test_compress_store_w_256:
584 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
585 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
586 ; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
587 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
588 ; X86-NEXT: retl # encoding: [0xc3]
590 ; X64-LABEL: test_compress_store_w_256:
592 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
593 ; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
594 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
595 ; X64-NEXT: retq # encoding: [0xc3]
596 call void @llvm.x86.avx512.mask.compress.store.w.256(ptr %addr, <16 x i16> %data, i16 -1)
600 define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
601 ; X86-LABEL: test_mask_compress_w_256:
603 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
604 ; X86-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
605 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
606 ; X86-NEXT: retl # encoding: [0xc3]
608 ; X64-LABEL: test_mask_compress_w_256:
610 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
611 ; X64-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
612 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
613 ; X64-NEXT: retq # encoding: [0xc3]
614 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
618 define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
619 ; X86-LABEL: test_maskz_compress_w_256:
621 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
622 ; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
623 ; X86-NEXT: retl # encoding: [0xc3]
625 ; X64-LABEL: test_maskz_compress_w_256:
627 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
628 ; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
629 ; X64-NEXT: retq # encoding: [0xc3]
630 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
634 define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
635 ; CHECK-LABEL: test_compress_w_256:
637 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
638 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
642 declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
644 define void @test_mask_compress_store_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
645 ; X86-LABEL: test_mask_compress_store_b_256:
647 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
648 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
649 ; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
650 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
651 ; X86-NEXT: retl # encoding: [0xc3]
653 ; X64-LABEL: test_mask_compress_store_b_256:
655 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
656 ; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
657 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
658 ; X64-NEXT: retq # encoding: [0xc3]
659 call void @llvm.x86.avx512.mask.compress.store.b.256(ptr %addr, <32 x i8> %data, i32 %mask)
663 declare void @llvm.x86.avx512.mask.compress.store.b.256(ptr %addr, <32 x i8> %data, i32 %mask)
665 define void @test_compress_store_b_256(ptr %addr, <32 x i8> %data) {
666 ; X86-LABEL: test_compress_store_b_256:
668 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
669 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
670 ; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
671 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
672 ; X86-NEXT: retl # encoding: [0xc3]
674 ; X64-LABEL: test_compress_store_b_256:
676 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
677 ; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
678 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
679 ; X64-NEXT: retq # encoding: [0xc3]
680 call void @llvm.x86.avx512.mask.compress.store.b.256(ptr %addr, <32 x i8> %data, i32 -1)
684 define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
685 ; X86-LABEL: test_mask_compress_b_256:
687 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
688 ; X86-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
689 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
690 ; X86-NEXT: retl # encoding: [0xc3]
692 ; X64-LABEL: test_mask_compress_b_256:
694 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
695 ; X64-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
696 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
697 ; X64-NEXT: retq # encoding: [0xc3]
698 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
702 define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
703 ; X86-LABEL: test_maskz_compress_b_256:
705 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
706 ; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
707 ; X86-NEXT: retl # encoding: [0xc3]
709 ; X64-LABEL: test_maskz_compress_b_256:
711 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
712 ; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
713 ; X64-NEXT: retq # encoding: [0xc3]
714 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
718 define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
719 ; CHECK-LABEL: test_compress_b_256:
721 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
722 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
726 declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
728 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
729 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
731 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
732 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
733 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
734 ; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
735 ; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
736 ; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
737 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
738 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
739 ; X86-NEXT: retl # encoding: [0xc3]
741 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
743 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
744 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
745 ; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
746 ; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
747 ; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
748 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
749 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
750 ; X64-NEXT: retq # encoding: [0xc3]
751 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
752 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1)
753 %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24, <4 x i32> zeroinitializer,i8 %x4)
754 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0
755 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %res1, 1
756 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %res2, 2
757 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
759 declare <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
761 define { <8 x i32>, <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
762 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
764 ; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
765 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
766 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
767 ; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xe1,0x16]
768 ; X86-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x17]
769 ; X86-NEXT: vpshldd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x71,0xd1,0x18]
770 ; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
771 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
772 ; X86-NEXT: retl # encoding: [0xc3]
774 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
776 ; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
777 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
778 ; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xe1,0x16]
779 ; X64-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x17]
780 ; X64-NEXT: vpshldd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x71,0xd1,0x18]
781 ; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
782 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
783 ; X64-NEXT: retq # encoding: [0xc3]
784 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
785 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23, <8 x i32> %x3, i8 -1)
786 %res2 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 24, <8 x i32> zeroinitializer, i8 %x4)
787 %res3 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0
788 %res4 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } %res3, <8 x i32> %res1, 1
789 %res5 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } %res4, <8 x i32> %res2, 2
790 ret { <8 x i32>, <8 x i32>, <8 x i32> } %res5
792 declare <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
794 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
795 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
797 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
798 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
799 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
800 ; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xe1,0x16]
801 ; X86-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x17]
802 ; X86-NEXT: vpshldq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x71,0xd1,0x18]
803 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
804 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
805 ; X86-NEXT: retl # encoding: [0xc3]
807 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
809 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
810 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
811 ; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xe1,0x16]
812 ; X64-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x17]
813 ; X64-NEXT: vpshldq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x71,0xd1,0x18]
814 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
815 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
816 ; X64-NEXT: retq # encoding: [0xc3]
817 %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
818 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23, <2 x i64> %x3, i8 -1)
819 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 24, <2 x i64> zeroinitializer, i8 %x4)
820 %res3 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0
821 %res4 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } %res3, <2 x i64> %res1, 1
822 %res5 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } %res4, <2 x i64> %res2, 2
823 ret { <2 x i64>, <2 x i64>, <2 x i64> } %res5
825 declare <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
827 define { <4 x i64>, <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
828 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
830 ; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
831 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
832 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
833 ; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xe1,0x16]
834 ; X86-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x17]
835 ; X86-NEXT: vpshldq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x71,0xd1,0x18]
836 ; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
837 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
838 ; X86-NEXT: retl # encoding: [0xc3]
840 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
842 ; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
843 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
844 ; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xe1,0x16]
845 ; X64-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x17]
846 ; X64-NEXT: vpshldq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x71,0xd1,0x18]
847 ; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
848 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
849 ; X64-NEXT: retq # encoding: [0xc3]
850 %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
851 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23, <4 x i64> %x3, i8 -1)
852 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 24, <4 x i64> zeroinitializer, i8 %x4)
853 %res3 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0
854 %res4 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } %res3, <4 x i64> %res1, 1
855 %res5 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } %res4, <4 x i64> %res2, 2
856 ret { <4 x i64>, <4 x i64>, <4 x i64> } %res5
858 declare <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
860 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
861 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
863 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
864 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
865 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
866 ; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xe1,0x06]
867 ; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x07]
868 ; X86-NEXT: vpshldw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x70,0xd1,0x08]
869 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
870 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
871 ; X86-NEXT: retl # encoding: [0xc3]
873 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
875 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
876 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
877 ; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xe1,0x06]
878 ; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x07]
879 ; X64-NEXT: vpshldw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x70,0xd1,0x08]
880 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
881 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
882 ; X64-NEXT: retq # encoding: [0xc3]
883 %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4)
884 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7, <8 x i16> %x3, i8 -1)
885 %res2 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 8, <8 x i16> zeroinitializer, i8 %x4)
886 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
887 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1
888 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2
889 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
891 declare <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8)
893 define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
894 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
896 ; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
897 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
898 ; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xe1,0x06]
899 ; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x07]
900 ; X86-NEXT: vpshldw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x70,0xd1,0x08]
901 ; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
902 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
903 ; X86-NEXT: retl # encoding: [0xc3]
905 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
907 ; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
908 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
909 ; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xe1,0x06]
910 ; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x07]
911 ; X64-NEXT: vpshldw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x70,0xd1,0x08]
912 ; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
913 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
914 ; X64-NEXT: retq # encoding: [0xc3]
915 %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4)
916 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7, <16 x i16> %x3, i16 -1)
917 %res2 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 8, <16 x i16> zeroinitializer, i16 %x4)
918 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
919 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1
920 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2
921 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
923 declare <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16)
925 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
926 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
928 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
929 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
930 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
931 ; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
932 ; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
933 ; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
934 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
935 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
936 ; X86-NEXT: retl # encoding: [0xc3]
938 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
940 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
941 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
942 ; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
943 ; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
944 ; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
945 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
946 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
947 ; X64-NEXT: retq # encoding: [0xc3]
948 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
949 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1)
950 %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24, <4 x i32> zeroinitializer,i8 %x4)
951 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0
952 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %res1, 1
953 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %res2, 2
954 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
956 declare <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
958 define { <8 x i32>, <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
959 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
961 ; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
962 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
963 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
964 ; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xe1,0x16]
965 ; X86-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x17]
966 ; X86-NEXT: vpshrdd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x73,0xd1,0x18]
967 ; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
968 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
969 ; X86-NEXT: retl # encoding: [0xc3]
971 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
973 ; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
974 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
975 ; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xe1,0x16]
976 ; X64-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x17]
977 ; X64-NEXT: vpshrdd $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x73,0xd1,0x18]
978 ; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
979 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
980 ; X64-NEXT: retq # encoding: [0xc3]
981 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
982 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23, <8 x i32> %x3, i8 -1)
983 %res2 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 24, <8 x i32> zeroinitializer, i8 %x4)
984 %res3 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0
985 %res4 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } %res3, <8 x i32> %res1, 1
986 %res5 = insertvalue { <8 x i32>, <8 x i32>, <8 x i32> } %res4, <8 x i32> %res2, 2
987 ret { <8 x i32>, <8 x i32>, <8 x i32> } %res5
989 declare <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
991 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
992 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
994 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
995 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
996 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
997 ; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xe1,0x16]
998 ; X86-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x17]
999 ; X86-NEXT: vpshrdq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x73,0xd1,0x18]
1000 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1001 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1002 ; X86-NEXT: retl # encoding: [0xc3]
1004 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
1006 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1007 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1008 ; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xe1,0x16]
1009 ; X64-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x17]
1010 ; X64-NEXT: vpshrdq $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x73,0xd1,0x18]
1011 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1012 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1013 ; X64-NEXT: retq # encoding: [0xc3]
1014 %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
1015 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23, <2 x i64> %x3, i8 -1)
1016 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 24, <2 x i64> zeroinitializer, i8 %x4)
1017 %res3 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0
1018 %res4 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } %res3, <2 x i64> %res1, 1
1019 %res5 = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } %res4, <2 x i64> %res2, 2
1020 ret { <2 x i64>, <2 x i64>, <2 x i64> } %res5
1022 declare <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
1024 define { <4 x i64>, <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
1025 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
1027 ; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
1028 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1029 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1030 ; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xe1,0x16]
1031 ; X86-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x17]
1032 ; X86-NEXT: vpshrdq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x73,0xd1,0x18]
1033 ; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
1034 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1035 ; X86-NEXT: retl # encoding: [0xc3]
1037 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
1039 ; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
1040 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1041 ; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xe1,0x16]
1042 ; X64-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x17]
1043 ; X64-NEXT: vpshrdq $24, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x73,0xd1,0x18]
1044 ; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
1045 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1046 ; X64-NEXT: retq # encoding: [0xc3]
1047 %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
1048 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23, <4 x i64> %x3, i8 -1)
1049 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 24, <4 x i64> zeroinitializer, i8 %x4)
1050 %res3 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0
1051 %res4 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } %res3, <4 x i64> %res1, 1
1052 %res5 = insertvalue { <4 x i64>, <4 x i64>, <4 x i64> } %res4, <4 x i64> %res2, 2
1053 ret { <4 x i64>, <4 x i64>, <4 x i64> } %res5
1055 declare <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
1057 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1058 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1060 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1061 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1062 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1063 ; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xe1,0x06]
1064 ; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x07]
1065 ; X86-NEXT: vpshrdw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x72,0xd1,0x08]
1066 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1067 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1068 ; X86-NEXT: retl # encoding: [0xc3]
1070 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1072 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1073 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1074 ; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xe1,0x06]
1075 ; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x07]
1076 ; X64-NEXT: vpshrdw $8, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x72,0xd1,0x08]
1077 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1078 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1079 ; X64-NEXT: retq # encoding: [0xc3]
1080 %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4)
1081 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7, <8 x i16> %x3, i8 -1)
1082 %res2 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 8, <8 x i16> zeroinitializer, i8 %x4)
1083 %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
1084 %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1
1085 %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2
1086 ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5
1088 declare <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8)
1090 define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1091 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1093 ; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
1094 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1095 ; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xe1,0x06]
1096 ; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x07]
1097 ; X86-NEXT: vpshrdw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x72,0xd1,0x08]
1098 ; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
1099 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1100 ; X86-NEXT: retl # encoding: [0xc3]
1102 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1104 ; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
1105 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1106 ; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xe1,0x06]
1107 ; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x07]
1108 ; X64-NEXT: vpshrdw $8, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x72,0xd1,0x08]
1109 ; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4]
1110 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1111 ; X64-NEXT: retq # encoding: [0xc3]
1112 %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4)
1113 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7, <16 x i16> %x3, i16 -1)
1114 %res2 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 8, <16 x i16> zeroinitializer, i16 %x4)
1115 %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
1116 %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1
1117 %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2
1118 ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5
1120 declare <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16)
1122 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
1123 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2:
1125 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1126 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1127 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1128 ; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
1129 ; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
1130 ; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
1131 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1132 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1133 ; X86-NEXT: retl # encoding: [0xc3]
1135 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2:
1137 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1138 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1139 ; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
1140 ; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
1141 ; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
1142 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1143 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1144 ; X64-NEXT: retq # encoding: [0xc3]
1145 %1 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1146 %2 = bitcast i8 %x4 to <8 x i1>
1147 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1148 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
1149 %4 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23)
1150 %5 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24)
1151 %6 = bitcast i8 %x4 to <8 x i1>
1152 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1153 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
1154 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1155 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1
1156 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2
1157 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
1159 declare <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32>, <4 x i32>, i32)
1161 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
1162 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2:
1164 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1165 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1166 ; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
1167 ; X86-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
1168 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1169 ; X86-NEXT: retl # encoding: [0xc3]
1171 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2:
1173 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1174 ; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
1175 ; X64-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
1176 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1177 ; X64-NEXT: retq # encoding: [0xc3]
1178 %1 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22)
1179 %2 = bitcast i8 %x4 to <8 x i1>
1180 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
1181 %4 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23)
1182 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1183 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %4, 1
1184 ret { <8 x i32>, <8 x i32> } %res3
1186 declare <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32>, <8 x i32>, i32)
1188 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
1189 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2:
1191 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1192 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1193 ; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
1194 ; X86-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
1195 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1196 ; X86-NEXT: retl # encoding: [0xc3]
1198 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2:
1200 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1201 ; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
1202 ; X64-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
1203 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1204 ; X64-NEXT: retq # encoding: [0xc3]
1205 %1 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22)
1206 %2 = bitcast i8 %x4 to <8 x i1>
1207 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1208 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
1209 %4 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23)
1210 %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1211 %res3 = insertvalue { <2 x i64>, <2 x i64> } %res2, <2 x i64> %4, 1
1212 ret { <2 x i64>, <2 x i64> } %res3
1214 declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32)
1216 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
1217 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2:
1219 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1220 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1221 ; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
1222 ; X86-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
1223 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1224 ; X86-NEXT: retl # encoding: [0xc3]
1226 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2:
1228 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1229 ; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
1230 ; X64-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
1231 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1232 ; X64-NEXT: retq # encoding: [0xc3]
1233 %1 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22)
1234 %2 = bitcast i8 %x4 to <8 x i1>
1235 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1236 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1237 %4 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23)
1238 %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1239 %res3 = insertvalue { <4 x i64>, <4 x i64> } %res2, <4 x i64> %4, 1
1240 ret { <4 x i64>, <4 x i64> } %res3
1242 declare <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64>, <4 x i64>, i32)
1244 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1245 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2:
1247 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1248 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1249 ; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
1250 ; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
1251 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1252 ; X86-NEXT: retl # encoding: [0xc3]
1254 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2:
1256 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1257 ; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
1258 ; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
1259 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1260 ; X64-NEXT: retq # encoding: [0xc3]
1261 %1 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
1262 %2 = bitcast i8 %x4 to <8 x i1>
1263 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1264 %4 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7)
1265 %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1266 %res3 = insertvalue { <8 x i16>, <8 x i16> } %res2, <8 x i16> %4, 1
1267 ret { <8 x i16>, <8 x i16> } %res3
1269 declare <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16>, <8 x i16>, i32)
1271 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1272 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2:
1274 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1275 ; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
1276 ; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
1277 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1278 ; X86-NEXT: retl # encoding: [0xc3]
1280 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2:
1282 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1283 ; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
1284 ; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
1285 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1286 ; X64-NEXT: retq # encoding: [0xc3]
1287 %1 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
1288 %2 = bitcast i16 %x4 to <16 x i1>
1289 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1290 %4 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7)
1291 %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1292 %res3 = insertvalue { <16 x i16>, <16 x i16> } %res2, <16 x i16> %4, 1
1293 ret { <16 x i16>, <16 x i16> } %res3
1295 declare <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16>, <16 x i16>, i32)
1297 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
1298 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2:
1300 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1301 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1302 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1303 ; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
1304 ; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
1305 ; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
1306 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1307 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1308 ; X86-NEXT: retl # encoding: [0xc3]
1310 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2:
1312 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
1313 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1314 ; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
1315 ; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
1316 ; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
1317 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
1318 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1319 ; X64-NEXT: retq # encoding: [0xc3]
1320 %1 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22)
1321 %2 = bitcast i8 %x4 to <8 x i1>
1322 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1323 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
1324 %4 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23)
1325 %5 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24)
1326 %6 = bitcast i8 %x4 to <8 x i1>
1327 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1328 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
1329 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1330 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1
1331 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2
1332 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
1334 declare <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32>, <4 x i32>, i32)
1336 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
1337 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2:
1339 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1340 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1341 ; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
1342 ; X86-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
1343 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1344 ; X86-NEXT: retl # encoding: [0xc3]
1346 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2:
1348 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1349 ; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
1350 ; X64-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
1351 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1352 ; X64-NEXT: retq # encoding: [0xc3]
1353 %1 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22)
1354 %2 = bitcast i8 %x4 to <8 x i1>
1355 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
1356 %4 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23)
1357 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1358 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %4, 1
1359 ret { <8 x i32>, <8 x i32> } %res3
1361 declare <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32>, <8 x i32>, i32)
1363 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
1364 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2:
1366 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1367 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1368 ; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
1369 ; X86-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
1370 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1371 ; X86-NEXT: retl # encoding: [0xc3]
1373 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2:
1375 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1376 ; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
1377 ; X64-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
1378 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1379 ; X64-NEXT: retq # encoding: [0xc3]
1380 %1 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22)
1381 %2 = bitcast i8 %x4 to <8 x i1>
1382 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1383 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
1384 %4 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23)
1385 %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1386 %res3 = insertvalue { <2 x i64>, <2 x i64> } %res2, <2 x i64> %4, 1
1387 ret { <2 x i64>, <2 x i64> } %res3
1389 declare <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64>, <2 x i64>, i32)
1391 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
1392 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2:
1394 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1395 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1396 ; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
1397 ; X86-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
1398 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1399 ; X86-NEXT: retl # encoding: [0xc3]
1401 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2:
1403 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1404 ; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
1405 ; X64-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
1406 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1407 ; X64-NEXT: retq # encoding: [0xc3]
1408 %1 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22)
1409 %2 = bitcast i8 %x4 to <8 x i1>
1410 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1411 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1412 %4 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23)
1413 %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1414 %res3 = insertvalue { <4 x i64>, <4 x i64> } %res2, <4 x i64> %4, 1
1415 ret { <4 x i64>, <4 x i64> } %res3
1417 declare <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64>, <4 x i64>, i32)
1419 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1420 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2:
1422 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1423 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1424 ; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1425 ; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1426 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1427 ; X86-NEXT: retl # encoding: [0xc3]
1429 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2:
1431 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1432 ; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1433 ; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1434 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1435 ; X64-NEXT: retq # encoding: [0xc3]
1436 %1 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
1437 %2 = bitcast i8 %x4 to <8 x i1>
1438 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1439 %4 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7)
1440 %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1441 %res3 = insertvalue { <8 x i16>, <8 x i16> } %res2, <8 x i16> %4, 1
1442 ret { <8 x i16>, <8 x i16> } %res3
1444 declare <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16>, <8 x i16>, i32)
1446 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1447 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2:
1449 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1450 ; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1451 ; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1452 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1453 ; X86-NEXT: retl # encoding: [0xc3]
1455 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2:
1457 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1458 ; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1459 ; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1460 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1461 ; X64-NEXT: retq # encoding: [0xc3]
1462 %1 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
1463 %2 = bitcast i16 %x4 to <16 x i1>
1464 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1465 %4 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7)
1466 %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1467 %res3 = insertvalue { <16 x i16>, <16 x i16> } %res2, <16 x i16> %4, 1
1468 ret { <16 x i16>, <16 x i16> } %res3
1470 declare <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16>, <16 x i16>, i32)
1472 declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1473 declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1475 define <8 x i32>@test_int_x86_avx512_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
1476 ; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_256:
1478 ; CHECK-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xc2]
1479 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1480 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
1484 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1485 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1487 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1488 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1489 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1490 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1491 ; X86-NEXT: vpshrdvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x00]
1492 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1493 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1494 ; X86-NEXT: retl # encoding: [0xc3]
1496 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1498 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1499 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1500 ; X64-NEXT: vpshrdvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x07]
1501 ; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1502 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1503 ; X64-NEXT: retq # encoding: [0xc3]
1504 %x2 = load <8 x i32>, ptr %x2p
1505 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
1506 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3)
1507 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0
1508 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1
1509 ret { <8 x i32>, <8 x i32> } %res3
1512 declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1513 declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1515 define <4 x i32>@test_int_x86_avx512_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
1516 ; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_128:
1518 ; CHECK-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xc2]
1519 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1520 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
1524 define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1525 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1527 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1528 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1529 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1530 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1531 ; X86-NEXT: vpshrdvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x00]
1532 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1533 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1534 ; X86-NEXT: retl # encoding: [0xc3]
1536 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1538 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1539 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1540 ; X64-NEXT: vpshrdvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x07]
1541 ; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1542 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1543 ; X64-NEXT: retq # encoding: [0xc3]
1544 %x2 = load <4 x i32>, ptr %x2p
1545 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
1546 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3)
1547 %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0
1548 %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1
1549 ret { <4 x i32>, <4 x i32> } %res3
1552 declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1553 declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1555 define <4 x i64>@test_int_x86_avx512_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
1556 ; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_256:
1558 ; CHECK-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xc2]
1559 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1560 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
1564 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1565 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1567 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1568 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1569 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1570 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1571 ; X86-NEXT: vpshrdvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x00]
1572 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1573 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1574 ; X86-NEXT: retl # encoding: [0xc3]
1576 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1578 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1579 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1580 ; X64-NEXT: vpshrdvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x07]
1581 ; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1582 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1583 ; X64-NEXT: retq # encoding: [0xc3]
1584 %x2 = load <4 x i64>, ptr %x2p
1585 %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
1586 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 %x3)
1587 %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0
1588 %res3 = insertvalue { <4 x i64>, <4 x i64> } %res2, <4 x i64> %res1, 1
1589 ret { <4 x i64>, <4 x i64> } %res3
1592 declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1593 declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1595 define <2 x i64>@test_int_x86_avx512_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
1596 ; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_128:
1598 ; CHECK-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xc2]
1599 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1600 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
1604 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1605 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1607 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1608 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1609 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1610 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1611 ; X86-NEXT: vpshrdvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x00]
1612 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1613 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1614 ; X86-NEXT: retl # encoding: [0xc3]
1616 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1618 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1619 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1620 ; X64-NEXT: vpshrdvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x07]
1621 ; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1622 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1623 ; X64-NEXT: retq # encoding: [0xc3]
1624 %x2 = load <2 x i64>, ptr %x2p
1625 %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
1626 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 %x3)
1627 %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0
1628 %res3 = insertvalue { <2 x i64>, <2 x i64> } %res2, <2 x i64> %res1, 1
1629 ret { <2 x i64>, <2 x i64> } %res3
1632 declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1633 declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1635 define <16 x i16>@test_int_x86_avx512_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1636 ; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_256:
1638 ; CHECK-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2]
1639 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1640 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1644 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1645 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1647 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1648 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1649 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1650 ; X86-NEXT: vpshrdvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x00]
1651 ; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1652 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1653 ; X86-NEXT: retl # encoding: [0xc3]
1655 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1657 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1658 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1659 ; X64-NEXT: vpshrdvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x07]
1660 ; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1661 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1662 ; X64-NEXT: retq # encoding: [0xc3]
1663 %x2 = load <16 x i16>, ptr %x2p
1664 %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1665 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 %x3)
1666 %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
1667 %res3 = insertvalue { <16 x i16>, <16 x i16> } %res2, <16 x i16> %res1, 1
1668 ret { <16 x i16>, <16 x i16> } %res3
1671 declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1672 declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1674 define <8 x i16>@test_int_x86_avx512_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1675 ; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_128:
1677 ; CHECK-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xc2]
1678 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1679 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1683 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1684 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1686 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1687 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1688 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1689 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1690 ; X86-NEXT: vpshrdvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x00]
1691 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1692 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1693 ; X86-NEXT: retl # encoding: [0xc3]
1695 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1697 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1698 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1699 ; X64-NEXT: vpshrdvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x07]
1700 ; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1701 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1702 ; X64-NEXT: retq # encoding: [0xc3]
1703 %x2 = load <8 x i16>, ptr %x2p
1704 %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1705 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 %x3)
1706 %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
1707 %res3 = insertvalue { <8 x i16>, <8 x i16> } %res2, <8 x i16> %res1, 1
1708 ret { <8 x i16>, <8 x i16> } %res3
1711 declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1712 declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1714 define <8 x i32>@test_int_x86_avx512_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
1715 ; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_256:
1717 ; CHECK-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xc2]
1718 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1719 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
1723 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1724 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1726 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1727 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1728 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1729 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1730 ; X86-NEXT: vpshldvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x00]
1731 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1732 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1733 ; X86-NEXT: retl # encoding: [0xc3]
1735 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1737 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1738 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1739 ; X64-NEXT: vpshldvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x07]
1740 ; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1741 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1742 ; X64-NEXT: retq # encoding: [0xc3]
1743 %x2 = load <8 x i32>, ptr %x2p
1744 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
1745 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3)
1746 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0
1747 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1
1748 ret { <8 x i32>, <8 x i32> } %res3
1751 declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1752 declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1754 define <4 x i32>@test_int_x86_avx512_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
1755 ; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_128:
1757 ; CHECK-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xc2]
1758 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1759 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
1763 define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1764 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1766 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1767 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1768 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1769 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1770 ; X86-NEXT: vpshldvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x00]
1771 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1772 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1773 ; X86-NEXT: retl # encoding: [0xc3]
1775 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1777 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1778 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1779 ; X64-NEXT: vpshldvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x07]
1780 ; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1781 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1782 ; X64-NEXT: retq # encoding: [0xc3]
1783 %x2 = load <4 x i32>, ptr %x2p
1784 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
1785 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3)
1786 %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0
1787 %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1
1788 ret { <4 x i32>, <4 x i32> } %res3
1791 declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1792 declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1794 define <4 x i64>@test_int_x86_avx512_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
1795 ; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_256:
1797 ; CHECK-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xc2]
1798 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1799 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
1803 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1804 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1806 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1807 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1808 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1809 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1810 ; X86-NEXT: vpshldvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x00]
1811 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1812 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1813 ; X86-NEXT: retl # encoding: [0xc3]
1815 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1817 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1818 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1819 ; X64-NEXT: vpshldvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x07]
1820 ; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1821 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1822 ; X64-NEXT: retq # encoding: [0xc3]
1823 %x2 = load <4 x i64>, ptr %x2p
1824 %res0 = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
1825 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 %x3)
1826 %res2 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %res0, 0
1827 %res3 = insertvalue { <4 x i64>, <4 x i64> } %res2, <4 x i64> %res1, 1
1828 ret { <4 x i64>, <4 x i64> } %res3
1831 declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1832 declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1834 define <2 x i64>@test_int_x86_avx512_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
1835 ; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_128:
1837 ; CHECK-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xc2]
1838 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1839 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
1843 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1844 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1846 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1847 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1848 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1849 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1850 ; X86-NEXT: vpshldvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x00]
1851 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1852 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1853 ; X86-NEXT: retl # encoding: [0xc3]
1855 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1857 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1858 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1859 ; X64-NEXT: vpshldvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x07]
1860 ; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1861 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1862 ; X64-NEXT: retq # encoding: [0xc3]
1863 %x2 = load <2 x i64>, ptr %x2p
1864 %res0 = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
1865 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 %x3)
1866 %res2 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %res0, 0
1867 %res3 = insertvalue { <2 x i64>, <2 x i64> } %res2, <2 x i64> %res1, 1
1868 ret { <2 x i64>, <2 x i64> } %res3
1871 declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1872 declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
1874 define <16 x i16>@test_int_x86_avx512_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1875 ; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_256:
1877 ; CHECK-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xc2]
1878 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1879 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
1883 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1884 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1886 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1887 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1888 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1889 ; X86-NEXT: vpshldvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x00]
1890 ; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1891 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1892 ; X86-NEXT: retl # encoding: [0xc3]
1894 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1896 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1897 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1898 ; X64-NEXT: vpshldvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x07]
1899 ; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1900 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1901 ; X64-NEXT: retq # encoding: [0xc3]
1902 %x2 = load <16 x i16>, ptr %x2p
1903 %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
1904 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 %x3)
1905 %res2 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0
1906 %res3 = insertvalue { <16 x i16>, <16 x i16> } %res2, <16 x i16> %res1, 1
1907 ret { <16 x i16>, <16 x i16> } %res3
1910 declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1911 declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
1913 define <8 x i16>@test_int_x86_avx512_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1914 ; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_128:
1916 ; CHECK-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2]
1917 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1918 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
1922 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1923 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1925 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1926 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1927 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1928 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1929 ; X86-NEXT: vpshldvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x00]
1930 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1931 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1932 ; X86-NEXT: retl # encoding: [0xc3]
1934 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1936 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1937 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1938 ; X64-NEXT: vpshldvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x07]
1939 ; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1940 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1941 ; X64-NEXT: retq # encoding: [0xc3]
1942 %x2 = load <8 x i16>, ptr %x2p
1943 %res0 = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
1944 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 %x3)
1945 %res2 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0
1946 %res3 = insertvalue { <8 x i16>, <8 x i16> } %res2, <8 x i16> %res1, 1
1947 ret { <8 x i16>, <8 x i16> } %res3