1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
6 ; X86-LABEL: test_mask_expand_load_w_512:
8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
10 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
11 ; X86-NEXT: retl # encoding: [0xc3]
13 ; X64-LABEL: test_mask_expand_load_w_512:
15 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
16 ; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
17 ; X64-NEXT: retq # encoding: [0xc3]
18 %1 = bitcast i8* %addr to i16*
19 %2 = bitcast i32 %mask to <32 x i1>
20 %3 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %data)
24 define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) {
25 ; X86-LABEL: test_maskz_expand_load_w_512:
27 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
28 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
29 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00]
30 ; X86-NEXT: retl # encoding: [0xc3]
32 ; X64-LABEL: test_maskz_expand_load_w_512:
34 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
35 ; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07]
36 ; X64-NEXT: retq # encoding: [0xc3]
37 %1 = bitcast i8* %addr to i16*
38 %2 = bitcast i32 %mask to <32 x i1>
39 %3 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> zeroinitializer)
43 define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) {
44 ; X86-LABEL: test_expand_load_w_512:
46 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
47 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
48 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
49 ; X86-NEXT: retl # encoding: [0xc3]
51 ; X64-LABEL: test_expand_load_w_512:
53 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
54 ; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
55 ; X64-NEXT: retq # encoding: [0xc3]
56 %1 = bitcast i8* %addr to i16*
57 %2 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> %data)
61 define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
62 ; CHECK-LABEL: test_expand_w_512:
64 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
65 %1 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
69 define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
70 ; X86-LABEL: test_mask_expand_w_512:
72 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
73 ; X86-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
74 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
75 ; X86-NEXT: retl # encoding: [0xc3]
77 ; X64-LABEL: test_mask_expand_w_512:
79 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
80 ; X64-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
81 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
82 ; X64-NEXT: retq # encoding: [0xc3]
83 %1 = bitcast i32 %mask to <32 x i1>
84 %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
88 define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
89 ; X86-LABEL: test_maskz_expand_w_512:
91 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
92 ; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
93 ; X86-NEXT: retl # encoding: [0xc3]
95 ; X64-LABEL: test_maskz_expand_w_512:
97 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
98 ; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
99 ; X64-NEXT: retq # encoding: [0xc3]
100 %1 = bitcast i32 %mask to <32 x i1>
101 %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
105 define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
106 ; X86-LABEL: test_mask_expand_load_b_512:
108 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
109 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
110 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
111 ; X86-NEXT: retl # encoding: [0xc3]
113 ; X64-LABEL: test_mask_expand_load_b_512:
115 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
116 ; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
117 ; X64-NEXT: retq # encoding: [0xc3]
118 %1 = bitcast i64 %mask to <64 x i1>
119 %2 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> %1, <64 x i8> %data)
123 define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) {
124 ; X86-LABEL: test_maskz_expand_load_b_512:
126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
127 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
128 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00]
129 ; X86-NEXT: retl # encoding: [0xc3]
131 ; X64-LABEL: test_maskz_expand_load_b_512:
133 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
134 ; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07]
135 ; X64-NEXT: retq # encoding: [0xc3]
136 %1 = bitcast i64 %mask to <64 x i1>
137 %2 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> %1, <64 x i8> zeroinitializer)
141 define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) {
142 ; X86-LABEL: test_expand_load_b_512:
144 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
145 ; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
146 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
147 ; X86-NEXT: retl # encoding: [0xc3]
149 ; X64-LABEL: test_expand_load_b_512:
151 ; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
152 ; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
153 ; X64-NEXT: retq # encoding: [0xc3]
154 %1 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <64 x i8> %data)
158 define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
159 ; CHECK-LABEL: test_expand_b_512:
161 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
162 %1 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
166 define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
167 ; X86-LABEL: test_mask_expand_b_512:
169 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
170 ; X86-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
171 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
172 ; X86-NEXT: retl # encoding: [0xc3]
174 ; X64-LABEL: test_mask_expand_b_512:
176 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
177 ; X64-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
178 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
179 ; X64-NEXT: retq # encoding: [0xc3]
180 %1 = bitcast i64 %mask to <64 x i1>
181 %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
185 define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
186 ; X86-LABEL: test_maskz_expand_b_512:
188 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
189 ; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
190 ; X86-NEXT: retl # encoding: [0xc3]
192 ; X64-LABEL: test_maskz_expand_b_512:
194 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
195 ; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
196 ; X64-NEXT: retq # encoding: [0xc3]
197 %1 = bitcast i64 %mask to <64 x i1>
198 %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
202 define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
203 ; X86-LABEL: test_mask_compress_store_w_512:
205 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
206 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
207 ; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
208 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
209 ; X86-NEXT: retl # encoding: [0xc3]
211 ; X64-LABEL: test_mask_compress_store_w_512:
213 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
214 ; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
215 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
216 ; X64-NEXT: retq # encoding: [0xc3]
217 %1 = bitcast i8* %addr to i16*
218 %2 = bitcast i32 %mask to <32 x i1>
219 call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> %2)
223 define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
224 ; X86-LABEL: test_compress_store_w_512:
226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
227 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
228 ; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
229 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
230 ; X86-NEXT: retl # encoding: [0xc3]
232 ; X64-LABEL: test_compress_store_w_512:
234 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
235 ; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
236 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
237 ; X64-NEXT: retq # encoding: [0xc3]
238 %1 = bitcast i8* %addr to i16*
239 call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
243 define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
244 ; X86-LABEL: test_mask_compress_w_512:
246 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
247 ; X86-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
248 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
249 ; X86-NEXT: retl # encoding: [0xc3]
251 ; X64-LABEL: test_mask_compress_w_512:
253 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
254 ; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
255 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
256 ; X64-NEXT: retq # encoding: [0xc3]
257 %1 = bitcast i32 %mask to <32 x i1>
258 %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
262 define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
263 ; X86-LABEL: test_maskz_compress_w_512:
265 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
266 ; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
267 ; X86-NEXT: retl # encoding: [0xc3]
269 ; X64-LABEL: test_maskz_compress_w_512:
271 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
272 ; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
273 ; X64-NEXT: retq # encoding: [0xc3]
274 %1 = bitcast i32 %mask to <32 x i1>
275 %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
279 define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
280 ; CHECK-LABEL: test_compress_w_512:
282 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
283 %1 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
287 define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
288 ; X86-LABEL: test_mask_compress_store_b_512:
290 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
291 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
292 ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
293 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
294 ; X86-NEXT: retl # encoding: [0xc3]
296 ; X64-LABEL: test_mask_compress_store_b_512:
298 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
299 ; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
300 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
301 ; X64-NEXT: retq # encoding: [0xc3]
302 %1 = bitcast i64 %mask to <64 x i1>
303 call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> %1)
307 define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
308 ; X86-LABEL: test_compress_store_b_512:
310 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
311 ; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
312 ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
313 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
314 ; X86-NEXT: retl # encoding: [0xc3]
316 ; X64-LABEL: test_compress_store_b_512:
318 ; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
319 ; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
320 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
321 ; X64-NEXT: retq # encoding: [0xc3]
322 call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
326 define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
327 ; X86-LABEL: test_mask_compress_b_512:
329 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
330 ; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
331 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
332 ; X86-NEXT: retl # encoding: [0xc3]
334 ; X64-LABEL: test_mask_compress_b_512:
336 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
337 ; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
338 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
339 ; X64-NEXT: retq # encoding: [0xc3]
340 %1 = bitcast i64 %mask to <64 x i1>
341 %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
345 define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
346 ; X86-LABEL: test_maskz_compress_b_512:
348 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
349 ; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
350 ; X86-NEXT: retl # encoding: [0xc3]
352 ; X64-LABEL: test_maskz_compress_b_512:
354 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
355 ; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
356 ; X64-NEXT: retq # encoding: [0xc3]
357 %1 = bitcast i64 %mask to <64 x i1>
358 %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
362 define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
363 ; CHECK-LABEL: test_compress_b_512:
365 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
366 %1 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
370 define <16 x i32> @test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
371 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
373 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
374 ; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
375 ; X86-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc1,0x17]
376 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
377 ; X86-NEXT: retl # encoding: [0xc3]
379 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
381 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
382 ; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
383 ; X64-NEXT: vpshldd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xc1,0x17]
384 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
385 ; X64-NEXT: retq # encoding: [0xc3]
386 %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
387 %2 = bitcast i16 %x4 to <16 x i1>
388 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
389 %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
390 %res2 = add <16 x i32> %3, %4
394 define <8 x i64> @test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
395 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
397 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
398 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
399 ; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
400 ; X86-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc1,0x17]
401 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
402 ; X86-NEXT: retl # encoding: [0xc3]
404 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
406 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
407 ; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
408 ; X64-NEXT: vpshldq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xc1,0x17]
409 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
410 ; X64-NEXT: retq # encoding: [0xc3]
411 %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
412 %2 = bitcast i8 %x4 to <8 x i1>
413 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
414 %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>)
415 %res2 = add <8 x i64> %3, %4
419 define <32 x i16> @test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
420 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
422 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
423 ; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
424 ; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc1,0x07]
425 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
426 ; X86-NEXT: retl # encoding: [0xc3]
428 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
430 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
431 ; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
432 ; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xc1,0x07]
433 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
434 ; X64-NEXT: retq # encoding: [0xc3]
435 %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
436 %2 = bitcast i32 %x4 to <32 x i1>
437 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
438 %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
439 %res2 = add <32 x i16> %3, %4
443 define <16 x i32> @test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
444 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
446 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
447 ; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
448 ; X86-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc1,0x17]
449 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
450 ; X86-NEXT: retl # encoding: [0xc3]
452 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
454 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
455 ; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
456 ; X64-NEXT: vpshrdd $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xc1,0x17]
457 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
458 ; X64-NEXT: retq # encoding: [0xc3]
459 %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
460 %2 = bitcast i16 %x4 to <16 x i1>
461 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
462 %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>)
463 %res2 = add <16 x i32> %3, %4
467 define <8 x i64> @test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
468 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
470 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
471 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
472 ; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
473 ; X86-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc1,0x17]
474 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
475 ; X86-NEXT: retl # encoding: [0xc3]
477 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
479 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
480 ; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
481 ; X64-NEXT: vpshrdq $23, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xc1,0x17]
482 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
483 ; X64-NEXT: retq # encoding: [0xc3]
484 %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
485 %2 = bitcast i8 %x4 to <8 x i1>
486 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
487 %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23, i64 23>)
488 %res2 = add <8 x i64> %3, %4
492 define <32 x i16> @test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
493 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
495 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
496 ; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
497 ; X86-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc1,0x07]
498 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
499 ; X86-NEXT: retl # encoding: [0xc3]
501 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
503 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
504 ; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
505 ; X64-NEXT: vpshrdw $7, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xc1,0x07]
506 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
507 ; X64-NEXT: retq # encoding: [0xc3]
508 %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
509 %2 = bitcast i32 %x4 to <32 x i1>
510 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
511 %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
512 %res2 = add <32 x i16> %3, %4
516 define <16 x i32> @test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
517 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
519 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
520 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
521 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
522 ; X86-NEXT: vpshrdvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x18]
523 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2]
524 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
525 ; X86-NEXT: retl # encoding: [0xc3]
527 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
529 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
530 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
531 ; X64-NEXT: vpshrdvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x1f]
532 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2]
533 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
534 ; X64-NEXT: retq # encoding: [0xc3]
535 %x2 = load <16 x i32>, <16 x i32>* %x2p
536 %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
537 %2 = bitcast i16 %x3 to <16 x i1>
538 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
539 %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x4)
540 %5 = bitcast i16 %x3 to <16 x i1>
541 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
542 %res3 = add <16 x i32> %3, %6
546 define <8 x i64> @test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
547 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
549 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
550 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
551 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
552 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
553 ; X86-NEXT: vpshrdvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x18]
554 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2]
555 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
556 ; X86-NEXT: retl # encoding: [0xc3]
558 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
560 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
561 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
562 ; X64-NEXT: vpshrdvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x1f]
563 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2]
564 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
565 ; X64-NEXT: retq # encoding: [0xc3]
566 %x2 = load <8 x i64>, <8 x i64>* %x2p
567 %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
568 %2 = bitcast i8 %x3 to <8 x i1>
569 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
570 %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x4)
571 %5 = bitcast i8 %x3 to <8 x i1>
572 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
573 %res3 = add <8 x i64> %3, %6
577 define <32 x i16> @test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
578 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
580 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
581 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
582 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
583 ; X86-NEXT: vpshrdvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x18]
584 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2]
585 ; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
586 ; X86-NEXT: retl # encoding: [0xc3]
588 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
590 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
591 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
592 ; X64-NEXT: vpshrdvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x1f]
593 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2]
594 ; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
595 ; X64-NEXT: retq # encoding: [0xc3]
596 %x2 = load <32 x i16>, <32 x i16>* %x2p
597 %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
598 %2 = bitcast i32 %x3 to <32 x i1>
599 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0
600 %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x4)
601 %5 = bitcast i32 %x3 to <32 x i1>
602 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
603 %res3 = add <32 x i16> %3, %6
607 define <16 x i32> @test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
608 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
610 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
611 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
612 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
613 ; X86-NEXT: vpshldvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x18]
614 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2]
615 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
616 ; X86-NEXT: retl # encoding: [0xc3]
618 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
620 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
621 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
622 ; X64-NEXT: vpshldvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x1f]
623 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2]
624 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
625 ; X64-NEXT: retq # encoding: [0xc3]
626 %x2 = load <16 x i32>, <16 x i32>* %x2p
627 %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
628 %2 = bitcast i16 %x3 to <16 x i1>
629 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
630 %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
631 %5 = bitcast i16 %x3 to <16 x i1>
632 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
633 %res3 = add <16 x i32> %3, %6
637 define <8 x i64> @test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
638 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
640 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
641 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
642 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
643 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
644 ; X86-NEXT: vpshldvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x18]
645 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2]
646 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
647 ; X86-NEXT: retl # encoding: [0xc3]
649 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
651 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
652 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
653 ; X64-NEXT: vpshldvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x1f]
654 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2]
655 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
656 ; X64-NEXT: retq # encoding: [0xc3]
657 %x2 = load <8 x i64>, <8 x i64>* %x2p
658 %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
659 %2 = bitcast i8 %x3 to <8 x i1>
660 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
661 %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4)
662 %5 = bitcast i8 %x3 to <8 x i1>
663 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
664 %res3 = add <8 x i64> %3, %6
668 define <32 x i16> @test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
669 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
671 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
672 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
673 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
674 ; X86-NEXT: vpshldvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x18]
675 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2]
676 ; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
677 ; X86-NEXT: retl # encoding: [0xc3]
679 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
681 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
682 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
683 ; X64-NEXT: vpshldvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x1f]
684 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2]
685 ; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
686 ; X64-NEXT: retq # encoding: [0xc3]
687 %x2 = load <32 x i16>, <32 x i16>* %x2p
688 %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
689 %2 = bitcast i32 %x3 to <32 x i1>
690 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0
691 %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4)
692 %5 = bitcast i32 %x3 to <32 x i1>
693 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
694 %res3 = add <32 x i16> %3, %6
698 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
699 declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
700 declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
701 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
702 declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
703 declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
704 declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
705 declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
706 declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
707 declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
708 declare <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
709 declare <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
710 declare <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
711 declare <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)