1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
6 ; X86-LABEL: test_mask_expand_load_w_512:
8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
10 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
11 ; X86-NEXT: retl # encoding: [0xc3]
13 ; X64-LABEL: test_mask_expand_load_w_512:
15 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
16 ; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
17 ; X64-NEXT: retq # encoding: [0xc3]
18 %1 = bitcast i8* %addr to i16*
19 %2 = bitcast i32 %mask to <32 x i1>
20 %3 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %data)
24 define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) {
25 ; X86-LABEL: test_maskz_expand_load_w_512:
27 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
28 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
29 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00]
30 ; X86-NEXT: retl # encoding: [0xc3]
32 ; X64-LABEL: test_maskz_expand_load_w_512:
34 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
35 ; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07]
36 ; X64-NEXT: retq # encoding: [0xc3]
37 %1 = bitcast i8* %addr to i16*
38 %2 = bitcast i32 %mask to <32 x i1>
39 %3 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> zeroinitializer)
43 define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) {
44 ; X86-LABEL: test_expand_load_w_512:
46 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
47 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
48 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
49 ; X86-NEXT: retl # encoding: [0xc3]
51 ; X64-LABEL: test_expand_load_w_512:
53 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
54 ; X64-NEXT: vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
55 ; X64-NEXT: retq # encoding: [0xc3]
56 %1 = bitcast i8* %addr to i16*
57 %2 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> %data)
61 define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
62 ; CHECK-LABEL: test_expand_w_512:
64 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
65 %1 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
69 define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
70 ; X86-LABEL: test_mask_expand_w_512:
72 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
73 ; X86-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
74 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
75 ; X86-NEXT: retl # encoding: [0xc3]
77 ; X64-LABEL: test_mask_expand_w_512:
79 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
80 ; X64-NEXT: vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
81 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
82 ; X64-NEXT: retq # encoding: [0xc3]
83 %1 = bitcast i32 %mask to <32 x i1>
84 %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
88 define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
89 ; X86-LABEL: test_maskz_expand_w_512:
91 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
92 ; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
93 ; X86-NEXT: retl # encoding: [0xc3]
95 ; X64-LABEL: test_maskz_expand_w_512:
97 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
98 ; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
99 ; X64-NEXT: retq # encoding: [0xc3]
100 %1 = bitcast i32 %mask to <32 x i1>
101 %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
105 define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
106 ; X86-LABEL: test_mask_expand_load_b_512:
108 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
109 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
110 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
111 ; X86-NEXT: retl # encoding: [0xc3]
113 ; X64-LABEL: test_mask_expand_load_b_512:
115 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
116 ; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
117 ; X64-NEXT: retq # encoding: [0xc3]
118 %1 = bitcast i64 %mask to <64 x i1>
119 %2 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> %1, <64 x i8> %data)
123 define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) {
124 ; X86-LABEL: test_maskz_expand_load_b_512:
126 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
127 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
128 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00]
129 ; X86-NEXT: retl # encoding: [0xc3]
131 ; X64-LABEL: test_maskz_expand_load_b_512:
133 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
134 ; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07]
135 ; X64-NEXT: retq # encoding: [0xc3]
136 %1 = bitcast i64 %mask to <64 x i1>
137 %2 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> %1, <64 x i8> zeroinitializer)
141 define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) {
142 ; X86-LABEL: test_expand_load_b_512:
144 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
145 ; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
146 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
147 ; X86-NEXT: retl # encoding: [0xc3]
149 ; X64-LABEL: test_expand_load_b_512:
151 ; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
152 ; X64-NEXT: vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
153 ; X64-NEXT: retq # encoding: [0xc3]
154 %1 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <64 x i8> %data)
158 define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
159 ; CHECK-LABEL: test_expand_b_512:
161 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
162 %1 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
166 define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
167 ; X86-LABEL: test_mask_expand_b_512:
169 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
170 ; X86-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
171 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
172 ; X86-NEXT: retl # encoding: [0xc3]
174 ; X64-LABEL: test_mask_expand_b_512:
176 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
177 ; X64-NEXT: vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
178 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
179 ; X64-NEXT: retq # encoding: [0xc3]
180 %1 = bitcast i64 %mask to <64 x i1>
181 %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
185 define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
186 ; X86-LABEL: test_maskz_expand_b_512:
188 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
189 ; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
190 ; X86-NEXT: retl # encoding: [0xc3]
192 ; X64-LABEL: test_maskz_expand_b_512:
194 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
195 ; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
196 ; X64-NEXT: retq # encoding: [0xc3]
197 %1 = bitcast i64 %mask to <64 x i1>
198 %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
202 define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
203 ; X86-LABEL: test_mask_compress_store_w_512:
205 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
206 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
207 ; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
208 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
209 ; X86-NEXT: retl # encoding: [0xc3]
211 ; X64-LABEL: test_mask_compress_store_w_512:
213 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
214 ; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
215 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
216 ; X64-NEXT: retq # encoding: [0xc3]
217 %1 = bitcast i8* %addr to i16*
218 %2 = bitcast i32 %mask to <32 x i1>
219 call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> %2)
223 define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
224 ; X86-LABEL: test_compress_store_w_512:
226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
227 ; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
228 ; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
229 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
230 ; X86-NEXT: retl # encoding: [0xc3]
232 ; X64-LABEL: test_compress_store_w_512:
234 ; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
235 ; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
236 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
237 ; X64-NEXT: retq # encoding: [0xc3]
238 %1 = bitcast i8* %addr to i16*
239 call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
243 define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
244 ; X86-LABEL: test_mask_compress_w_512:
246 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
247 ; X86-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
248 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
249 ; X86-NEXT: retl # encoding: [0xc3]
251 ; X64-LABEL: test_mask_compress_w_512:
253 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
254 ; X64-NEXT: vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
255 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
256 ; X64-NEXT: retq # encoding: [0xc3]
257 %1 = bitcast i32 %mask to <32 x i1>
258 %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
262 define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
263 ; X86-LABEL: test_maskz_compress_w_512:
265 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
266 ; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
267 ; X86-NEXT: retl # encoding: [0xc3]
269 ; X64-LABEL: test_maskz_compress_w_512:
271 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
272 ; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
273 ; X64-NEXT: retq # encoding: [0xc3]
274 %1 = bitcast i32 %mask to <32 x i1>
275 %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
279 define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
280 ; CHECK-LABEL: test_compress_w_512:
282 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
283 %1 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
287 define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
288 ; X86-LABEL: test_mask_compress_store_b_512:
290 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
291 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
292 ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
293 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
294 ; X86-NEXT: retl # encoding: [0xc3]
296 ; X64-LABEL: test_mask_compress_store_b_512:
298 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
299 ; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
300 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
301 ; X64-NEXT: retq # encoding: [0xc3]
302 %1 = bitcast i64 %mask to <64 x i1>
303 call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> %1)
307 define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
308 ; X86-LABEL: test_compress_store_b_512:
310 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
311 ; X86-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
312 ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
313 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
314 ; X86-NEXT: retl # encoding: [0xc3]
316 ; X64-LABEL: test_compress_store_b_512:
318 ; X64-NEXT: kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
319 ; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
320 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
321 ; X64-NEXT: retq # encoding: [0xc3]
322 call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
326 define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
327 ; X86-LABEL: test_mask_compress_b_512:
329 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
330 ; X86-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
331 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
332 ; X86-NEXT: retl # encoding: [0xc3]
334 ; X64-LABEL: test_mask_compress_b_512:
336 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
337 ; X64-NEXT: vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
338 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
339 ; X64-NEXT: retq # encoding: [0xc3]
340 %1 = bitcast i64 %mask to <64 x i1>
341 %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
345 define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
346 ; X86-LABEL: test_maskz_compress_b_512:
348 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
349 ; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
350 ; X86-NEXT: retl # encoding: [0xc3]
352 ; X64-LABEL: test_maskz_compress_b_512:
354 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
355 ; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
356 ; X64-NEXT: retq # encoding: [0xc3]
357 %1 = bitcast i64 %mask to <64 x i1>
358 %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
362 define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
363 ; CHECK-LABEL: test_compress_b_512:
365 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
366 %1 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
370 define <16 x i32> @test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
371 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
373 ; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16]
374 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
375 ; X86-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
376 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
377 ; X86-NEXT: retl # encoding: [0xc3]
379 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
381 ; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16]
382 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
383 ; X64-NEXT: vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
384 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
385 ; X64-NEXT: retq # encoding: [0xc3]
386 %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
387 %2 = bitcast i16 %x4 to <16 x i1>
388 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
389 %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
390 %res2 = add <16 x i32> %3, %4
394 define <8 x i64> @test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
395 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
397 ; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16]
398 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
399 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
400 ; X86-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
401 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
402 ; X86-NEXT: retl # encoding: [0xc3]
404 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
406 ; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16]
407 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
408 ; X64-NEXT: vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
409 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
410 ; X64-NEXT: retq # encoding: [0xc3]
411 %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
412 %2 = bitcast i8 %x4 to <8 x i1>
413 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
414 %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
415 %res2 = add <8 x i64> %3, %4
419 define <32 x i16> @test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
420 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
422 ; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x06]
423 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
424 ; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
425 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
426 ; X86-NEXT: retl # encoding: [0xc3]
428 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
430 ; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x06]
431 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
432 ; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
433 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
434 ; X64-NEXT: retq # encoding: [0xc3]
435 %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
436 %2 = bitcast i32 %x4 to <32 x i1>
437 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
438 %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
439 %res2 = add <32 x i16> %3, %4
443 define <16 x i32> @test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
444 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
446 ; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16]
447 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
448 ; X86-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
449 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
450 ; X86-NEXT: retl # encoding: [0xc3]
452 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
454 ; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16]
455 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
456 ; X64-NEXT: vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
457 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
458 ; X64-NEXT: retq # encoding: [0xc3]
459 %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
460 %2 = bitcast i16 %x4 to <16 x i1>
461 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
462 %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>)
463 %res2 = add <16 x i32> %3, %4
467 define <8 x i64> @test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
468 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
470 ; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16]
471 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
472 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
473 ; X86-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
474 ; X86-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
475 ; X86-NEXT: retl # encoding: [0xc3]
477 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
479 ; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16]
480 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
481 ; X64-NEXT: vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
482 ; X64-NEXT: vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
483 ; X64-NEXT: retq # encoding: [0xc3]
484 %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
485 %2 = bitcast i8 %x4 to <8 x i1>
486 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
487 %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> <i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22, i64 22>)
488 %res2 = add <8 x i64> %3, %4
492 define <32 x i16> @test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
493 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
495 ; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x06]
496 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
497 ; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
498 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
499 ; X86-NEXT: retl # encoding: [0xc3]
501 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
503 ; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x06]
504 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
505 ; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
506 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
507 ; X64-NEXT: retq # encoding: [0xc3]
508 %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
509 %2 = bitcast i32 %x4 to <32 x i1>
510 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
511 %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>)
512 %res2 = add <32 x i16> %3, %4
516 define <16 x i32> @test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
517 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
519 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
520 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
521 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
522 ; X86-NEXT: vpshrdvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x18]
523 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
524 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xe2]
525 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2]
526 ; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
527 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
528 ; X86-NEXT: retl # encoding: [0xc3]
530 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
532 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
533 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
534 ; X64-NEXT: vpshrdvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x1f]
535 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
536 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xe2]
537 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2]
538 ; X64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
539 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
540 ; X64-NEXT: retq # encoding: [0xc3]
541 %x2 = load <16 x i32>, <16 x i32>* %x2p
542 %1 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
543 %2 = bitcast i16 %x3 to <16 x i1>
544 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
545 %4 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x4)
546 %5 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x4)
547 %6 = bitcast i16 %x3 to <16 x i1>
548 %7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> zeroinitializer
549 %res3 = add <16 x i32> %3, %4
550 %res4 = add <16 x i32> %7, %res3
554 define <8 x i64> @test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
555 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
557 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
558 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
559 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
560 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
561 ; X86-NEXT: vpshrdvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x18]
562 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
563 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xe2]
564 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2]
565 ; X86-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
566 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
567 ; X86-NEXT: retl # encoding: [0xc3]
569 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
571 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
572 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
573 ; X64-NEXT: vpshrdvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x1f]
574 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
575 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xe2]
576 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2]
577 ; X64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
578 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
579 ; X64-NEXT: retq # encoding: [0xc3]
580 %x2 = load <8 x i64>, <8 x i64>* %x2p
581 %1 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
582 %2 = bitcast i8 %x3 to <8 x i1>
583 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
584 %4 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x4)
585 %5 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x4)
586 %6 = bitcast i8 %x3 to <8 x i1>
587 %7 = select <8 x i1> %6, <8 x i64> %5, <8 x i64> zeroinitializer
588 %res3 = add <8 x i64> %3, %4
589 %res4 = add <8 x i64> %7, %res3
593 define <32 x i16> @test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
594 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
596 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
597 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
598 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
599 ; X86-NEXT: vpshrdvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x18]
600 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
601 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xe2]
602 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2]
603 ; X86-NEXT: vpaddw %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfd,0xc0]
604 ; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
605 ; X86-NEXT: retl # encoding: [0xc3]
607 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
609 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
610 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
611 ; X64-NEXT: vpshrdvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x1f]
612 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
613 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xe2]
614 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xc2]
615 ; X64-NEXT: vpaddw %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfd,0xc0]
616 ; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
617 ; X64-NEXT: retq # encoding: [0xc3]
618 %x2 = load <32 x i16>, <32 x i16>* %x2p
619 %1 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
620 %2 = bitcast i32 %x3 to <32 x i1>
621 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0
622 %4 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x4)
623 %5 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x4)
624 %6 = bitcast i32 %x3 to <32 x i1>
625 %7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> zeroinitializer
626 %res3 = add <32 x i16> %3, %4
627 %res4 = add <32 x i16> %7, %res3
631 define <16 x i32> @test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
632 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
634 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
635 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
636 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
637 ; X86-NEXT: vpshldvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x18]
638 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
639 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xe2]
640 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2]
641 ; X86-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
642 ; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
643 ; X86-NEXT: retl # encoding: [0xc3]
645 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
647 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
648 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
649 ; X64-NEXT: vpshldvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x1f]
650 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
651 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xe2]
652 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2]
653 ; X64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
654 ; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
655 ; X64-NEXT: retq # encoding: [0xc3]
656 %x2 = load <16 x i32>, <16 x i32>* %x2p
657 %1 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
658 %2 = bitcast i16 %x3 to <16 x i1>
659 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
660 %4 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
661 %5 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4)
662 %6 = bitcast i16 %x3 to <16 x i1>
663 %7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> zeroinitializer
664 %res3 = add <16 x i32> %3, %4
665 %res4 = add <16 x i32> %7, %res3
669 define <8 x i64> @test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
670 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
672 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
673 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
674 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
675 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
676 ; X86-NEXT: vpshldvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x18]
677 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
678 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xe2]
679 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2]
680 ; X86-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
681 ; X86-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
682 ; X86-NEXT: retl # encoding: [0xc3]
684 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
686 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
687 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
688 ; X64-NEXT: vpshldvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x1f]
689 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
690 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xe2]
691 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2]
692 ; X64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
693 ; X64-NEXT: vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
694 ; X64-NEXT: retq # encoding: [0xc3]
695 %x2 = load <8 x i64>, <8 x i64>* %x2p
696 %1 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
697 %2 = bitcast i8 %x3 to <8 x i1>
698 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
699 %4 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4)
700 %5 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4)
701 %6 = bitcast i8 %x3 to <8 x i1>
702 %7 = select <8 x i1> %6, <8 x i64> %5, <8 x i64> zeroinitializer
703 %res3 = add <8 x i64> %3, %4
704 %res4 = add <8 x i64> %7, %res3
708 define <32 x i16> @test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
709 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
711 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
712 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
713 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
714 ; X86-NEXT: vpshldvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x18]
715 ; X86-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
716 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xe2]
717 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2]
718 ; X86-NEXT: vpaddw %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfd,0xc0]
719 ; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
720 ; X86-NEXT: retl # encoding: [0xc3]
722 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
724 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
725 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
726 ; X64-NEXT: vpshldvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x1f]
727 ; X64-NEXT: vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
728 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xe2]
729 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xc2]
730 ; X64-NEXT: vpaddw %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfd,0xc0]
731 ; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
732 ; X64-NEXT: retq # encoding: [0xc3]
733 %x2 = load <32 x i16>, <32 x i16>* %x2p
734 %1 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
735 %2 = bitcast i32 %x3 to <32 x i1>
736 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x0
737 %4 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4)
738 %5 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4)
739 %6 = bitcast i32 %x3 to <32 x i1>
740 %7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> zeroinitializer
741 %res3 = add <32 x i16> %3, %4
742 %res4 = add <32 x i16> %7, %res3
746 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
747 declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
748 declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
749 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
750 declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
751 declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
752 declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
753 declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
754 declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
755 declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
756 declare <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
757 declare <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
758 declare <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
759 declare <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)