1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 define <8 x i16> @test_mask_expand_load_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
6 ; X86-LABEL: test_mask_expand_load_w_128:
8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
11 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
12 ; X86-NEXT: retl # encoding: [0xc3]
14 ; X64-LABEL: test_mask_expand_load_w_128:
16 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
17 ; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
18 ; X64-NEXT: retq # encoding: [0xc3]
19 %1 = bitcast i8 %mask to <8 x i1>
20 %2 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> %1, <8 x i16> %data)
24 define <8 x i16> @test_maskz_expand_load_w_128(ptr %addr, i8 %mask) {
25 ; X86-LABEL: test_maskz_expand_load_w_128:
27 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
28 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
29 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
30 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00]
31 ; X86-NEXT: retl # encoding: [0xc3]
33 ; X64-LABEL: test_maskz_expand_load_w_128:
35 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
36 ; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07]
37 ; X64-NEXT: retq # encoding: [0xc3]
38 %1 = bitcast i8 %mask to <8 x i1>
39 %2 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> %1, <8 x i16> zeroinitializer)
43 define <8 x i16> @test_expand_load_w_128(ptr %addr, <8 x i16> %data) {
44 ; X86-LABEL: test_expand_load_w_128:
46 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
47 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
48 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00]
49 ; X86-NEXT: retl # encoding: [0xc3]
51 ; X64-LABEL: test_expand_load_w_128:
53 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
54 ; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07]
55 ; X64-NEXT: retq # encoding: [0xc3]
56 %1 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr %addr, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %data)
60 define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
61 ; CHECK-LABEL: test_expand_w_128:
63 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
64 %1 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
68 define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
69 ; X86-LABEL: test_mask_expand_w_128:
71 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
72 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
73 ; X86-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
74 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
75 ; X86-NEXT: retl # encoding: [0xc3]
77 ; X64-LABEL: test_mask_expand_w_128:
79 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
80 ; X64-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
81 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
82 ; X64-NEXT: retq # encoding: [0xc3]
83 %1 = bitcast i8 %mask to <8 x i1>
84 %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
88 define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
89 ; X86-LABEL: test_maskz_expand_w_128:
91 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
92 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
93 ; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
94 ; X86-NEXT: retl # encoding: [0xc3]
96 ; X64-LABEL: test_maskz_expand_w_128:
98 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
99 ; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
100 ; X64-NEXT: retq # encoding: [0xc3]
101 %1 = bitcast i8 %mask to <8 x i1>
102 %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
106 define <16 x i8> @test_mask_expand_load_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
107 ; X86-LABEL: test_mask_expand_load_b_128:
109 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
110 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
111 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
112 ; X86-NEXT: retl # encoding: [0xc3]
114 ; X64-LABEL: test_mask_expand_load_b_128:
116 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
117 ; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
118 ; X64-NEXT: retq # encoding: [0xc3]
119 %1 = bitcast i16 %mask to <16 x i1>
120 %2 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> %1, <16 x i8> %data)
124 define <16 x i8> @test_maskz_expand_load_b_128(ptr %addr, i16 %mask) {
125 ; X86-LABEL: test_maskz_expand_load_b_128:
127 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
128 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
129 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00]
130 ; X86-NEXT: retl # encoding: [0xc3]
132 ; X64-LABEL: test_maskz_expand_load_b_128:
134 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
135 ; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07]
136 ; X64-NEXT: retq # encoding: [0xc3]
137 %1 = bitcast i16 %mask to <16 x i1>
138 %2 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> %1, <16 x i8> zeroinitializer)
142 define <16 x i8> @test_expand_load_b_128(ptr %addr, <16 x i8> %data) {
143 ; X86-LABEL: test_expand_load_b_128:
145 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
146 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
147 ; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00]
148 ; X86-NEXT: retl # encoding: [0xc3]
150 ; X64-LABEL: test_expand_load_b_128:
152 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
153 ; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07]
154 ; X64-NEXT: retq # encoding: [0xc3]
155 %1 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %data)
159 define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
160 ; CHECK-LABEL: test_expand_b_128:
162 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
163 %1 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
167 define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
168 ; X86-LABEL: test_mask_expand_b_128:
170 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
171 ; X86-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
172 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
173 ; X86-NEXT: retl # encoding: [0xc3]
175 ; X64-LABEL: test_mask_expand_b_128:
177 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
178 ; X64-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
179 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
180 ; X64-NEXT: retq # encoding: [0xc3]
181 %1 = bitcast i16 %mask to <16 x i1>
182 %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
186 define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
187 ; X86-LABEL: test_maskz_expand_b_128:
189 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
190 ; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
191 ; X86-NEXT: retl # encoding: [0xc3]
193 ; X64-LABEL: test_maskz_expand_b_128:
195 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
196 ; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
197 ; X64-NEXT: retq # encoding: [0xc3]
198 %1 = bitcast i16 %mask to <16 x i1>
199 %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
203 define void @test_mask_compress_store_w_128(ptr %addr, <8 x i16> %data, i8 %mask) {
204 ; X86-LABEL: test_mask_compress_store_w_128:
206 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
207 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
208 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
209 ; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
210 ; X86-NEXT: retl # encoding: [0xc3]
212 ; X64-LABEL: test_mask_compress_store_w_128:
214 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
215 ; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
216 ; X64-NEXT: retq # encoding: [0xc3]
217 %1 = bitcast i8 %mask to <8 x i1>
218 call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %addr, <8 x i1> %1)
222 define void @test_compress_store_w_128(ptr %addr, <8 x i16> %data) {
223 ; X86-LABEL: test_compress_store_w_128:
225 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
226 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
227 ; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00]
228 ; X86-NEXT: retl # encoding: [0xc3]
230 ; X64-LABEL: test_compress_store_w_128:
232 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
233 ; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07]
234 ; X64-NEXT: retq # encoding: [0xc3]
235 call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, ptr %addr, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
239 define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
240 ; X86-LABEL: test_mask_compress_w_128:
242 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
243 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
244 ; X86-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
245 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
246 ; X86-NEXT: retl # encoding: [0xc3]
248 ; X64-LABEL: test_mask_compress_w_128:
250 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
251 ; X64-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
252 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
253 ; X64-NEXT: retq # encoding: [0xc3]
254 %1 = bitcast i8 %mask to <8 x i1>
255 %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
259 define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
260 ; X86-LABEL: test_maskz_compress_w_128:
262 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
263 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
264 ; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
265 ; X86-NEXT: retl # encoding: [0xc3]
267 ; X64-LABEL: test_maskz_compress_w_128:
269 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
270 ; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
271 ; X64-NEXT: retq # encoding: [0xc3]
272 %1 = bitcast i8 %mask to <8 x i1>
273 %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
277 define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
278 ; CHECK-LABEL: test_compress_w_128:
280 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
281 %1 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
285 define void @test_mask_compress_store_b_128(ptr %addr, <16 x i8> %data, i16 %mask) {
286 ; X86-LABEL: test_mask_compress_store_b_128:
288 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
289 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
290 ; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
291 ; X86-NEXT: retl # encoding: [0xc3]
293 ; X64-LABEL: test_mask_compress_store_b_128:
295 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
296 ; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
297 ; X64-NEXT: retq # encoding: [0xc3]
298 %1 = bitcast i16 %mask to <16 x i1>
299 call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %addr, <16 x i1> %1)
303 define void @test_compress_store_b_128(ptr %addr, <16 x i8> %data) {
304 ; X86-LABEL: test_compress_store_b_128:
306 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
307 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
308 ; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00]
309 ; X86-NEXT: retl # encoding: [0xc3]
311 ; X64-LABEL: test_compress_store_b_128:
313 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
314 ; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07]
315 ; X64-NEXT: retq # encoding: [0xc3]
316 call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
320 define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
321 ; X86-LABEL: test_mask_compress_b_128:
323 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
324 ; X86-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
325 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
326 ; X86-NEXT: retl # encoding: [0xc3]
328 ; X64-LABEL: test_mask_compress_b_128:
330 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
331 ; X64-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
332 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
333 ; X64-NEXT: retq # encoding: [0xc3]
334 %1 = bitcast i16 %mask to <16 x i1>
335 %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
339 define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
340 ; X86-LABEL: test_maskz_compress_b_128:
342 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
343 ; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
344 ; X86-NEXT: retl # encoding: [0xc3]
346 ; X64-LABEL: test_maskz_compress_b_128:
348 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
349 ; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
350 ; X64-NEXT: retq # encoding: [0xc3]
351 %1 = bitcast i16 %mask to <16 x i1>
352 %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
356 define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
357 ; CHECK-LABEL: test_compress_b_128:
359 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
360 %1 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
364 define <16 x i16> @test_mask_expand_load_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
365 ; X86-LABEL: test_mask_expand_load_w_256:
367 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
368 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
369 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
370 ; X86-NEXT: retl # encoding: [0xc3]
372 ; X64-LABEL: test_mask_expand_load_w_256:
374 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
375 ; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
376 ; X64-NEXT: retq # encoding: [0xc3]
377 %1 = bitcast i16 %mask to <16 x i1>
378 %2 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> %1, <16 x i16> %data)
382 define <16 x i16> @test_maskz_expand_load_w_256(ptr %addr, i16 %mask) {
383 ; X86-LABEL: test_maskz_expand_load_w_256:
385 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
386 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
387 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00]
388 ; X86-NEXT: retl # encoding: [0xc3]
390 ; X64-LABEL: test_maskz_expand_load_w_256:
392 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
393 ; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07]
394 ; X64-NEXT: retq # encoding: [0xc3]
395 %1 = bitcast i16 %mask to <16 x i1>
396 %2 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> %1, <16 x i16> zeroinitializer)
400 define <16 x i16> @test_expand_load_w_256(ptr %addr, <16 x i16> %data) {
401 ; X86-LABEL: test_expand_load_w_256:
403 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
404 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
405 ; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00]
406 ; X86-NEXT: retl # encoding: [0xc3]
408 ; X64-LABEL: test_expand_load_w_256:
410 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
411 ; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07]
412 ; X64-NEXT: retq # encoding: [0xc3]
413 %1 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %data)
417 define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
418 ; CHECK-LABEL: test_expand_w_256:
420 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
421 %1 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
425 define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
426 ; X86-LABEL: test_mask_expand_w_256:
428 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
429 ; X86-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
430 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
431 ; X86-NEXT: retl # encoding: [0xc3]
433 ; X64-LABEL: test_mask_expand_w_256:
435 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
436 ; X64-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
437 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
438 ; X64-NEXT: retq # encoding: [0xc3]
439 %1 = bitcast i16 %mask to <16 x i1>
440 %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
444 define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
445 ; X86-LABEL: test_maskz_expand_w_256:
447 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
448 ; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
449 ; X86-NEXT: retl # encoding: [0xc3]
451 ; X64-LABEL: test_maskz_expand_w_256:
453 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
454 ; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
455 ; X64-NEXT: retq # encoding: [0xc3]
456 %1 = bitcast i16 %mask to <16 x i1>
457 %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
461 define <32 x i8> @test_mask_expand_load_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
462 ; X86-LABEL: test_mask_expand_load_b_256:
464 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
465 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
466 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
467 ; X86-NEXT: retl # encoding: [0xc3]
469 ; X64-LABEL: test_mask_expand_load_b_256:
471 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
472 ; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
473 ; X64-NEXT: retq # encoding: [0xc3]
474 %1 = bitcast i32 %mask to <32 x i1>
475 %2 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> %1, <32 x i8> %data)
479 define <32 x i8> @test_maskz_expand_load_b_256(ptr %addr, i32 %mask) {
480 ; X86-LABEL: test_maskz_expand_load_b_256:
482 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
483 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
484 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00]
485 ; X86-NEXT: retl # encoding: [0xc3]
487 ; X64-LABEL: test_maskz_expand_load_b_256:
489 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
490 ; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07]
491 ; X64-NEXT: retq # encoding: [0xc3]
492 %1 = bitcast i32 %mask to <32 x i1>
493 %2 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> %1, <32 x i8> zeroinitializer)
497 define <32 x i8> @test_expand_load_b_256(ptr %addr, <32 x i8> %data) {
498 ; X86-LABEL: test_expand_load_b_256:
500 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
501 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
502 ; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00]
503 ; X86-NEXT: retl # encoding: [0xc3]
505 ; X64-LABEL: test_expand_load_b_256:
507 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
508 ; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07]
509 ; X64-NEXT: retq # encoding: [0xc3]
510 %1 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %data)
514 define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
515 ; CHECK-LABEL: test_expand_b_256:
517 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
518 %1 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
522 define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
523 ; X86-LABEL: test_mask_expand_b_256:
525 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
526 ; X86-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
527 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
528 ; X86-NEXT: retl # encoding: [0xc3]
530 ; X64-LABEL: test_mask_expand_b_256:
532 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
533 ; X64-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
534 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
535 ; X64-NEXT: retq # encoding: [0xc3]
536 %1 = bitcast i32 %mask to <32 x i1>
537 %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
541 define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
542 ; X86-LABEL: test_maskz_expand_b_256:
544 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
545 ; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
546 ; X86-NEXT: retl # encoding: [0xc3]
548 ; X64-LABEL: test_maskz_expand_b_256:
550 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
551 ; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
552 ; X64-NEXT: retq # encoding: [0xc3]
553 %1 = bitcast i32 %mask to <32 x i1>
554 %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
558 define void @test_mask_compress_store_w_256(ptr %addr, <16 x i16> %data, i16 %mask) {
559 ; X86-LABEL: test_mask_compress_store_w_256:
561 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
562 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
563 ; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
564 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
565 ; X86-NEXT: retl # encoding: [0xc3]
567 ; X64-LABEL: test_mask_compress_store_w_256:
569 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
570 ; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
571 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
572 ; X64-NEXT: retq # encoding: [0xc3]
573 %1 = bitcast i16 %mask to <16 x i1>
574 call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %addr, <16 x i1> %1)
578 define void @test_compress_store_w_256(ptr %addr, <16 x i16> %data) {
579 ; X86-LABEL: test_compress_store_w_256:
581 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
582 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
583 ; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00]
584 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
585 ; X86-NEXT: retl # encoding: [0xc3]
587 ; X64-LABEL: test_compress_store_w_256:
589 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
590 ; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07]
591 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
592 ; X64-NEXT: retq # encoding: [0xc3]
593 call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, ptr %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
597 define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
598 ; X86-LABEL: test_mask_compress_w_256:
600 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
601 ; X86-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
602 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
603 ; X86-NEXT: retl # encoding: [0xc3]
605 ; X64-LABEL: test_mask_compress_w_256:
607 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
608 ; X64-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
609 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
610 ; X64-NEXT: retq # encoding: [0xc3]
611 %1 = bitcast i16 %mask to <16 x i1>
612 %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
616 define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
617 ; X86-LABEL: test_maskz_compress_w_256:
619 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
620 ; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
621 ; X86-NEXT: retl # encoding: [0xc3]
623 ; X64-LABEL: test_maskz_compress_w_256:
625 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
626 ; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
627 ; X64-NEXT: retq # encoding: [0xc3]
628 %1 = bitcast i16 %mask to <16 x i1>
629 %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
633 define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
634 ; CHECK-LABEL: test_compress_w_256:
636 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
637 %1 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
641 define void @test_mask_compress_store_b_256(ptr %addr, <32 x i8> %data, i32 %mask) {
642 ; X86-LABEL: test_mask_compress_store_b_256:
644 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
645 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
646 ; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
647 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
648 ; X86-NEXT: retl # encoding: [0xc3]
650 ; X64-LABEL: test_mask_compress_store_b_256:
652 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
653 ; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
654 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
655 ; X64-NEXT: retq # encoding: [0xc3]
656 %1 = bitcast i32 %mask to <32 x i1>
657 call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %addr, <32 x i1> %1)
661 define void @test_compress_store_b_256(ptr %addr, <32 x i8> %data) {
662 ; X86-LABEL: test_compress_store_b_256:
664 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
665 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
666 ; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00]
667 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
668 ; X86-NEXT: retl # encoding: [0xc3]
670 ; X64-LABEL: test_compress_store_b_256:
672 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
673 ; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07]
674 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
675 ; X64-NEXT: retq # encoding: [0xc3]
676 call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, ptr %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
680 define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
681 ; X86-LABEL: test_mask_compress_b_256:
683 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
684 ; X86-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
685 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
686 ; X86-NEXT: retl # encoding: [0xc3]
688 ; X64-LABEL: test_mask_compress_b_256:
690 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
691 ; X64-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
692 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
693 ; X64-NEXT: retq # encoding: [0xc3]
694 %1 = bitcast i32 %mask to <32 x i1>
695 %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
699 define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
700 ; X86-LABEL: test_maskz_compress_b_256:
702 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
703 ; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
704 ; X86-NEXT: retl # encoding: [0xc3]
706 ; X64-LABEL: test_maskz_compress_b_256:
708 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
709 ; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
710 ; X64-NEXT: retq # encoding: [0xc3]
711 %1 = bitcast i32 %mask to <32 x i1>
712 %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
716 define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
717 ; CHECK-LABEL: test_compress_b_256:
719 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
720 %1 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
724 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
725 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
727 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
728 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
729 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
730 ; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
731 ; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
732 ; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
733 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
734 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
735 ; X86-NEXT: retl # encoding: [0xc3]
737 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
739 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
740 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
741 ; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xe1,0x16]
742 ; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17]
743 ; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xd1,0x18]
744 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
745 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
746 ; X64-NEXT: retq # encoding: [0xc3]
747 %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 22, i32 22, i32 22, i32 22>)
748 %2 = bitcast i8 %x4 to <8 x i1>
749 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
750 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
751 %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 23, i32 23, i32 23, i32 23>)
752 %5 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 24, i32 24, i32 24, i32 24>)
753 %6 = bitcast i8 %x4 to <8 x i1>
754 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
755 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
756 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
757 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1
758 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2
759 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
762 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
763 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
765 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
766 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
767 ; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
768 ; X86-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
769 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
770 ; X86-NEXT: retl # encoding: [0xc3]
772 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
774 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
775 ; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16]
776 ; X64-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc9,0x17]
777 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
778 ; X64-NEXT: retq # encoding: [0xc3]
779 %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
780 %2 = bitcast i8 %x4 to <8 x i1>
781 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
782 %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
783 %5 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
784 %6 = insertvalue { <8 x i32>, <8 x i32> } %5, <8 x i32> %4, 1
785 ret { <8 x i32>, <8 x i32> } %6
788 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
789 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
791 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
792 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
793 ; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
794 ; X86-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
795 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
796 ; X86-NEXT: retl # encoding: [0xc3]
798 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
800 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
801 ; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16]
802 ; X64-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc9,0x17]
803 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
804 ; X64-NEXT: retq # encoding: [0xc3]
805 %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 22, i64 22>)
806 %2 = bitcast i8 %x4 to <8 x i1>
807 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
808 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
809 %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 23, i64 23>)
810 %5 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
811 %6 = insertvalue { <2 x i64>, <2 x i64> } %5, <2 x i64> %4, 1
812 ret { <2 x i64>, <2 x i64> } %6
815 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
816 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
818 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
819 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
820 ; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
821 ; X86-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
822 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
823 ; X86-NEXT: retl # encoding: [0xc3]
825 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
827 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
828 ; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16]
829 ; X64-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc9,0x17]
830 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
831 ; X64-NEXT: retq # encoding: [0xc3]
832 %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 22, i64 22, i64 22, i64 22>)
833 %2 = bitcast i8 %x4 to <8 x i1>
834 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
835 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
836 %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 23, i64 23, i64 23, i64 23>)
837 %5 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
838 %6 = insertvalue { <4 x i64>, <4 x i64> } %5, <4 x i64> %4, 1
839 ret { <4 x i64>, <4 x i64> } %6
842 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
843 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
845 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
846 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
847 ; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
848 ; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
849 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
850 ; X86-NEXT: retl # encoding: [0xc3]
852 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
854 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
855 ; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
856 ; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc9,0x07]
857 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
858 ; X64-NEXT: retq # encoding: [0xc3]
859 %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
860 %2 = bitcast i8 %x4 to <8 x i1>
861 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
862 %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
863 %5 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
864 %6 = insertvalue { <8 x i16>, <8 x i16> } %5, <8 x i16> %4, 1
865 ret { <8 x i16>, <8 x i16> } %6
868 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
869 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
871 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
872 ; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
873 ; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
874 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
875 ; X86-NEXT: retl # encoding: [0xc3]
877 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
879 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
880 ; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
881 ; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc9,0x07]
882 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
883 ; X64-NEXT: retq # encoding: [0xc3]
884 %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
885 %2 = bitcast i16 %x4 to <16 x i1>
886 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
887 %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
888 %5 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
889 %6 = insertvalue { <16 x i16>, <16 x i16> } %5, <16 x i16> %4, 1
890 ret { <16 x i16>, <16 x i16> } %6
893 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
894 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
896 ; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
897 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
898 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
899 ; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
900 ; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
901 ; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
902 ; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
903 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
904 ; X86-NEXT: retl # encoding: [0xc3]
906 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
908 ; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
909 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
910 ; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xe1,0x16]
911 ; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17]
912 ; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xd1,0x18]
913 ; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4]
914 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
915 ; X64-NEXT: retq # encoding: [0xc3]
916 %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 22, i32 22, i32 22, i32 22>)
917 %2 = bitcast i8 %x4 to <8 x i1>
918 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
919 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3
920 %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 23, i32 23, i32 23, i32 23>)
921 %5 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 24, i32 24, i32 24, i32 24>)
922 %6 = bitcast i8 %x4 to <8 x i1>
923 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
924 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
925 %res3 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
926 %res4 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res3, <4 x i32> %4, 1
927 %res5 = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } %res4, <4 x i32> %7, 2
928 ret { <4 x i32>, <4 x i32>, <4 x i32> } %res5
931 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
932 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
934 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
935 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
936 ; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
937 ; X86-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
938 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
939 ; X86-NEXT: retl # encoding: [0xc3]
941 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
943 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
944 ; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16]
945 ; X64-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc9,0x17]
946 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
947 ; X64-NEXT: retq # encoding: [0xc3]
948 %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
949 %2 = bitcast i8 %x4 to <8 x i1>
950 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3
951 %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
952 %5 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
953 %6 = insertvalue { <8 x i32>, <8 x i32> } %5, <8 x i32> %4, 1
954 ret { <8 x i32>, <8 x i32> } %6
957 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
958 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
960 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
961 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
962 ; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
963 ; X86-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
964 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
965 ; X86-NEXT: retl # encoding: [0xc3]
967 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
969 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
970 ; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16]
971 ; X64-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc9,0x17]
972 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
973 ; X64-NEXT: retq # encoding: [0xc3]
974 %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 22, i64 22>)
975 %2 = bitcast i8 %x4 to <8 x i1>
976 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
977 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3
978 %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 23, i64 23>)
979 %5 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
980 %6 = insertvalue { <2 x i64>, <2 x i64> } %5, <2 x i64> %4, 1
981 ret { <2 x i64>, <2 x i64> } %6
984 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
985 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
987 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
988 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
989 ; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
990 ; X86-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
991 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
992 ; X86-NEXT: retl # encoding: [0xc3]
994 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
996 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
997 ; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16]
998 ; X64-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc9,0x17]
999 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1000 ; X64-NEXT: retq # encoding: [0xc3]
1001 %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 22, i64 22, i64 22, i64 22>)
1002 %2 = bitcast i8 %x4 to <8 x i1>
1003 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1004 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3
1005 %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 23, i64 23, i64 23, i64 23>)
1006 %5 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1007 %6 = insertvalue { <4 x i64>, <4 x i64> } %5, <4 x i64> %4, 1
1008 ret { <4 x i64>, <4 x i64> } %6
1011 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
1012 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1014 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1015 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1016 ; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1017 ; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1018 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1019 ; X86-NEXT: retl # encoding: [0xc3]
1021 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
1023 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1024 ; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
1025 ; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm1 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc9,0x07]
1026 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1027 ; X64-NEXT: retq # encoding: [0xc3]
1028 %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
1029 %2 = bitcast i8 %x4 to <8 x i1>
1030 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
1031 %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1032 %5 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1033 %6 = insertvalue { <8 x i16>, <8 x i16> } %5, <8 x i16> %4, 1
1034 ret { <8 x i16>, <8 x i16> } %6
1037 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
1038 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1040 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1041 ; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1042 ; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1043 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1044 ; X86-NEXT: retl # encoding: [0xc3]
1046 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
1048 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1049 ; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
1050 ; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm1 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc9,0x07]
1051 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1052 ; X64-NEXT: retq # encoding: [0xc3]
1053 %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
1054 %2 = bitcast i16 %x4 to <16 x i1>
1055 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
1056 %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
1057 %5 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1058 %6 = insertvalue { <16 x i16>, <16 x i16> } %5, <16 x i16> %4, 1
1059 ret { <16 x i16>, <16 x i16> } %6
1062 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1063 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1065 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1066 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1067 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1068 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1069 ; X86-NEXT: vpshrdvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x00]
1070 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1071 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1072 ; X86-NEXT: retl # encoding: [0xc3]
1074 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
1076 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1077 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1078 ; X64-NEXT: vpshrdvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x07]
1079 ; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xda]
1080 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1081 ; X64-NEXT: retq # encoding: [0xc3]
1082 %x2 = load <8 x i32>, ptr %x2p
1083 %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2)
1084 %2 = bitcast i8 %x3 to <8 x i1>
1085 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
1086 %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x4)
1087 %5 = bitcast i8 %x3 to <8 x i1>
1088 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
1089 %res3 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1090 %res4 = insertvalue { <8 x i32>, <8 x i32> } %res3, <8 x i32> %6, 1
1091 ret { <8 x i32>, <8 x i32> } %res4
1094 define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1095 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1097 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1098 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1099 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1100 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1101 ; X86-NEXT: vpshrdvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x00]
1102 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1103 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1104 ; X86-NEXT: retl # encoding: [0xc3]
1106 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
1108 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1109 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1110 ; X64-NEXT: vpshrdvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x07]
1111 ; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xda]
1112 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1113 ; X64-NEXT: retq # encoding: [0xc3]
1114 %x2 = load <4 x i32>, ptr %x2p
1115 %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2)
1116 %2 = bitcast i8 %x3 to <8 x i1>
1117 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1118 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
1119 %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x4)
1120 %5 = bitcast i8 %x3 to <8 x i1>
1121 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1122 %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
1123 %res3 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1124 %res4 = insertvalue { <4 x i32>, <4 x i32> } %res3, <4 x i32> %6, 1
1125 ret { <4 x i32>, <4 x i32> } %res4
1128 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1129 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1131 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1132 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1133 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1134 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1135 ; X86-NEXT: vpshrdvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x00]
1136 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1137 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1138 ; X86-NEXT: retl # encoding: [0xc3]
1140 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
1142 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1143 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1144 ; X64-NEXT: vpshrdvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x07]
1145 ; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xda]
1146 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1147 ; X64-NEXT: retq # encoding: [0xc3]
1148 %x2 = load <4 x i64>, ptr %x2p
1149 %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2)
1150 %2 = bitcast i8 %x3 to <8 x i1>
1151 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1152 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
1153 %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x4)
1154 %5 = bitcast i8 %x3 to <8 x i1>
1155 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1156 %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
1157 %res3 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1158 %res4 = insertvalue { <4 x i64>, <4 x i64> } %res3, <4 x i64> %6, 1
1159 ret { <4 x i64>, <4 x i64> } %res4
1162 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1163 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1165 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1166 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1167 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1168 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1169 ; X86-NEXT: vpshrdvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x00]
1170 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1171 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1172 ; X86-NEXT: retl # encoding: [0xc3]
1174 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
1176 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1177 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1178 ; X64-NEXT: vpshrdvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x07]
1179 ; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xda]
1180 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1181 ; X64-NEXT: retq # encoding: [0xc3]
1182 %x2 = load <2 x i64>, ptr %x2p
1183 %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2)
1184 %2 = bitcast i8 %x3 to <8 x i1>
1185 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1186 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
1187 %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x4)
1188 %5 = bitcast i8 %x3 to <8 x i1>
1189 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
1190 %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
1191 %res3 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1192 %res4 = insertvalue { <2 x i64>, <2 x i64> } %res3, <2 x i64> %6, 1
1193 ret { <2 x i64>, <2 x i64> } %res4
1196 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1197 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1199 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1200 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1201 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1202 ; X86-NEXT: vpshrdvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x00]
1203 ; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1204 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1205 ; X86-NEXT: retl # encoding: [0xc3]
1207 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
1209 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1210 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1211 ; X64-NEXT: vpshrdvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x07]
1212 ; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xda]
1213 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1214 ; X64-NEXT: retq # encoding: [0xc3]
1215 %x2 = load <16 x i16>, ptr %x2p
1216 %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1217 %2 = bitcast i16 %x3 to <16 x i1>
1218 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0
1219 %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x4)
1220 %5 = bitcast i16 %x3 to <16 x i1>
1221 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
1222 %7 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1223 %8 = insertvalue { <16 x i16>, <16 x i16> } %7, <16 x i16> %6, 1
1224 ret { <16 x i16>, <16 x i16> } %8
1227 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1228 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1230 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1231 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1232 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1233 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1234 ; X86-NEXT: vpshrdvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x00]
1235 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1236 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1237 ; X86-NEXT: retl # encoding: [0xc3]
1239 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
1241 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1242 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1243 ; X64-NEXT: vpshrdvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x07]
1244 ; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xda]
1245 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1246 ; X64-NEXT: retq # encoding: [0xc3]
1247 %x2 = load <8 x i16>, ptr %x2p
1248 %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1249 %2 = bitcast i8 %x3 to <8 x i1>
1250 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0
1251 %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x4)
1252 %5 = bitcast i8 %x3 to <8 x i1>
1253 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
1254 %7 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1255 %8 = insertvalue { <8 x i16>, <8 x i16> } %7, <8 x i16> %6, 1
1256 ret { <8 x i16>, <8 x i16> } %8
1259 define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) {
1260 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1262 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1263 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1264 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1265 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1266 ; X86-NEXT: vpshldvd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x00]
1267 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1268 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1269 ; X86-NEXT: retl # encoding: [0xc3]
1271 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
1273 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1274 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1275 ; X64-NEXT: vpshldvd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x07]
1276 ; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xda]
1277 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1278 ; X64-NEXT: retq # encoding: [0xc3]
1279 %x2 = load <8 x i32>, ptr %x2p
1280 %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2)
1281 %2 = bitcast i8 %x3 to <8 x i1>
1282 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
1283 %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4)
1284 %5 = bitcast i8 %x3 to <8 x i1>
1285 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
1286 %7 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0
1287 %8 = insertvalue { <8 x i32>, <8 x i32> } %7, <8 x i32> %6, 1
1288 ret { <8 x i32>, <8 x i32> } %8
1291 define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) {
1292 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1294 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1295 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1296 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1297 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1298 ; X86-NEXT: vpshldvd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x00]
1299 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1300 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1301 ; X86-NEXT: retl # encoding: [0xc3]
1303 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
1305 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1306 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1307 ; X64-NEXT: vpshldvd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x07]
1308 ; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xda]
1309 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1310 ; X64-NEXT: retq # encoding: [0xc3]
1311 %x2 = load <4 x i32>, ptr %x2p
1312 %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2)
1313 %2 = bitcast i8 %x3 to <8 x i1>
1314 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1315 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
1316 %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4)
1317 %5 = bitcast i8 %x3 to <8 x i1>
1318 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1319 %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer
1320 %7 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0
1321 %8 = insertvalue { <4 x i32>, <4 x i32> } %7, <4 x i32> %6, 1
1322 ret { <4 x i32>, <4 x i32> } %8
1325 define { <4 x i64>, <4 x i64> } @test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, ptr %x2p, <4 x i64> %x4, i8 %x3) {
1326 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1328 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1329 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1330 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1331 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1332 ; X86-NEXT: vpshldvq (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x00]
1333 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1334 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1335 ; X86-NEXT: retl # encoding: [0xc3]
1337 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
1339 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1340 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1341 ; X64-NEXT: vpshldvq (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x07]
1342 ; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xda]
1343 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1344 ; X64-NEXT: retq # encoding: [0xc3]
1345 %x2 = load <4 x i64>, ptr %x2p
1346 %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
1347 %2 = bitcast i8 %x3 to <8 x i1>
1348 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1349 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
1350 %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4)
1351 %5 = bitcast i8 %x3 to <8 x i1>
1352 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1353 %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer
1354 %7 = insertvalue { <4 x i64>, <4 x i64> } poison, <4 x i64> %3, 0
1355 %8 = insertvalue { <4 x i64>, <4 x i64> } %7, <4 x i64> %6, 1
1356 ret { <4 x i64>, <4 x i64> } %8
1359 define { <2 x i64>, <2 x i64> } @test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, ptr %x2p, <2 x i64> %x4, i8 %x3) {
1360 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1362 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1363 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1364 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1365 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1366 ; X86-NEXT: vpshldvq (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x00]
1367 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1368 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1369 ; X86-NEXT: retl # encoding: [0xc3]
1371 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
1373 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1374 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1375 ; X64-NEXT: vpshldvq (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x07]
1376 ; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xda]
1377 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1378 ; X64-NEXT: retq # encoding: [0xc3]
1379 %x2 = load <2 x i64>, ptr %x2p
1380 %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
1381 %2 = bitcast i8 %x3 to <8 x i1>
1382 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
1383 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
1384 %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4)
1385 %5 = bitcast i8 %x3 to <8 x i1>
1386 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
1387 %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer
1388 %7 = insertvalue { <2 x i64>, <2 x i64> } poison, <2 x i64> %3, 0
1389 %8 = insertvalue { <2 x i64>, <2 x i64> } %7, <2 x i64> %6, 1
1390 ret { <2 x i64>, <2 x i64> } %8
1393 define { <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, ptr %x2p, <16 x i16> %x4, i16 %x3) {
1394 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1396 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1397 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1398 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1399 ; X86-NEXT: vpshldvw (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x00]
1400 ; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1401 ; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1402 ; X86-NEXT: retl # encoding: [0xc3]
1404 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
1406 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1407 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1408 ; X64-NEXT: vpshldvw (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x07]
1409 ; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xda]
1410 ; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
1411 ; X64-NEXT: retq # encoding: [0xc3]
1412 %x2 = load <16 x i16>, ptr %x2p
1413 %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1414 %2 = bitcast i16 %x3 to <16 x i1>
1415 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0
1416 %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4)
1417 %5 = bitcast i16 %x3 to <16 x i1>
1418 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
1419 %7 = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0
1420 %8 = insertvalue { <16 x i16>, <16 x i16> } %7, <16 x i16> %6, 1
1421 ret { <16 x i16>, <16 x i16> } %8
1424 define { <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, ptr %x2p, <8 x i16> %x4, i8 %x3) {
1425 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1427 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1428 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1429 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1430 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
1431 ; X86-NEXT: vpshldvw (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x00]
1432 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1433 ; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1434 ; X86-NEXT: retl # encoding: [0xc3]
1436 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
1438 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1439 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1440 ; X64-NEXT: vpshldvw (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x07]
1441 ; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xda]
1442 ; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
1443 ; X64-NEXT: retq # encoding: [0xc3]
1444 %x2 = load <8 x i16>, ptr %x2p
1445 %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1446 %2 = bitcast i8 %x3 to <8 x i1>
1447 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0
1448 %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4)
1449 %5 = bitcast i8 %x3 to <8 x i1>
1450 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
1451 %7 = insertvalue { <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0
1452 %8 = insertvalue { <8 x i16>, <8 x i16> } %7, <8 x i16> %6, 1
1453 ret { <8 x i16>, <8 x i16> } %8
1456 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1457 declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
1458 declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
1459 declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
1460 declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
1461 declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
1462 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
1463 declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
1464 declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
1465 declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
1466 declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
1467 declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
1468 declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
1469 declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>)
1470 declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
1471 declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>)
1472 declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>)
1473 declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>)
1474 declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>)
1475 declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>)
1476 declare <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
1477 declare <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
1478 declare <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
1479 declare <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
1480 declare <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
1481 declare <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
1482 declare <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
1483 declare <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)