1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
3 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
5 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
6 ; AVX512BW-LABEL: test_mask_packs_epi32_rr_512:
8 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
11 ; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512:
13 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
14 ; AVX512F-32-NEXT: retl
15 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
19 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
20 ; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512:
22 ; AVX512BW-NEXT: kmovd %edi, %k1
23 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
24 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
27 ; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512:
29 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
30 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
31 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
32 ; AVX512F-32-NEXT: retl
33 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
34 %2 = bitcast i32 %mask to <32 x i1>
35 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
39 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
40 ; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512:
42 ; AVX512BW-NEXT: kmovd %edi, %k1
43 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
46 ; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512:
48 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
49 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
50 ; AVX512F-32-NEXT: retl
51 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
52 %2 = bitcast i32 %mask to <32 x i1>
53 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
57 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
58 ; AVX512BW-LABEL: test_mask_packs_epi32_rm_512:
60 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0
63 ; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512:
65 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
66 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0
67 ; AVX512F-32-NEXT: retl
68 %b = load <16 x i32>, <16 x i32>* %ptr_b
69 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
73 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
74 ; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512:
76 ; AVX512BW-NEXT: kmovd %esi, %k1
77 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1}
78 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
81 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512:
83 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
84 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
85 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1}
86 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
87 ; AVX512F-32-NEXT: retl
88 %b = load <16 x i32>, <16 x i32>* %ptr_b
89 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
90 %2 = bitcast i32 %mask to <32 x i1>
91 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
95 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
96 ; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512:
98 ; AVX512BW-NEXT: kmovd %esi, %k1
99 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z}
100 ; AVX512BW-NEXT: retq
102 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512:
103 ; AVX512F-32: # BB#0:
104 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
105 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
106 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z}
107 ; AVX512F-32-NEXT: retl
108 %b = load <16 x i32>, <16 x i32>* %ptr_b
109 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
110 %2 = bitcast i32 %mask to <32 x i1>
111 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
115 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
116 ; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512:
118 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0
119 ; AVX512BW-NEXT: retq
121 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512:
122 ; AVX512F-32: # BB#0:
123 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
124 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0
125 ; AVX512F-32-NEXT: retl
126 %q = load i32, i32* %ptr_b
127 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
128 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
129 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
133 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
134 ; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512:
136 ; AVX512BW-NEXT: kmovd %esi, %k1
137 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
138 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
139 ; AVX512BW-NEXT: retq
141 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512:
142 ; AVX512F-32: # BB#0:
143 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
144 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
145 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
146 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
147 ; AVX512F-32-NEXT: retl
148 %q = load i32, i32* %ptr_b
149 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
150 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
151 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
152 %2 = bitcast i32 %mask to <32 x i1>
153 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
157 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
158 ; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512:
160 ; AVX512BW-NEXT: kmovd %esi, %k1
161 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
162 ; AVX512BW-NEXT: retq
164 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512:
165 ; AVX512F-32: # BB#0:
166 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
167 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
168 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
169 ; AVX512F-32-NEXT: retl
170 %q = load i32, i32* %ptr_b
171 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
172 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
173 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
174 %2 = bitcast i32 %mask to <32 x i1>
175 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
179 declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>)
181 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
182 ; AVX512BW-LABEL: test_mask_packs_epi16_rr_512:
184 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
185 ; AVX512BW-NEXT: retq
187 ; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512:
188 ; AVX512F-32: # BB#0:
189 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
190 ; AVX512F-32-NEXT: retl
191 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
195 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
196 ; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512:
198 ; AVX512BW-NEXT: kmovq %rdi, %k1
199 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
200 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
201 ; AVX512BW-NEXT: retq
203 ; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512:
204 ; AVX512F-32: # BB#0:
205 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
206 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
207 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
208 ; AVX512F-32-NEXT: retl
209 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
210 %2 = bitcast i64 %mask to <64 x i1>
211 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
215 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
216 ; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512:
218 ; AVX512BW-NEXT: kmovq %rdi, %k1
219 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
220 ; AVX512BW-NEXT: retq
222 ; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512:
223 ; AVX512F-32: # BB#0:
224 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
225 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
226 ; AVX512F-32-NEXT: retl
227 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
228 %2 = bitcast i64 %mask to <64 x i1>
229 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
233 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
234 ; AVX512BW-LABEL: test_mask_packs_epi16_rm_512:
236 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0
237 ; AVX512BW-NEXT: retq
239 ; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512:
240 ; AVX512F-32: # BB#0:
241 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
242 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0
243 ; AVX512F-32-NEXT: retl
244 %b = load <32 x i16>, <32 x i16>* %ptr_b
245 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
249 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
250 ; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512:
252 ; AVX512BW-NEXT: kmovq %rsi, %k1
253 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1}
254 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
255 ; AVX512BW-NEXT: retq
257 ; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512:
258 ; AVX512F-32: # BB#0:
259 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
260 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
261 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1}
262 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
263 ; AVX512F-32-NEXT: retl
264 %b = load <32 x i16>, <32 x i16>* %ptr_b
265 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
266 %2 = bitcast i64 %mask to <64 x i1>
267 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
271 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
272 ; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512:
274 ; AVX512BW-NEXT: kmovq %rsi, %k1
275 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z}
276 ; AVX512BW-NEXT: retq
278 ; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512:
279 ; AVX512F-32: # BB#0:
280 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
281 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
282 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z}
283 ; AVX512F-32-NEXT: retl
284 %b = load <32 x i16>, <32 x i16>* %ptr_b
285 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
286 %2 = bitcast i64 %mask to <64 x i1>
287 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
291 declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>)
294 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
295 ; AVX512BW-LABEL: test_mask_packus_epi32_rr_512:
297 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
298 ; AVX512BW-NEXT: retq
300 ; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512:
301 ; AVX512F-32: # BB#0:
302 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
303 ; AVX512F-32-NEXT: retl
304 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
308 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
309 ; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512:
311 ; AVX512BW-NEXT: kmovd %edi, %k1
312 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
313 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
314 ; AVX512BW-NEXT: retq
316 ; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512:
317 ; AVX512F-32: # BB#0:
318 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
319 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
320 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
321 ; AVX512F-32-NEXT: retl
322 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
323 %2 = bitcast i32 %mask to <32 x i1>
324 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
328 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
329 ; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512:
331 ; AVX512BW-NEXT: kmovd %edi, %k1
332 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
333 ; AVX512BW-NEXT: retq
335 ; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512:
336 ; AVX512F-32: # BB#0:
337 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
338 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
339 ; AVX512F-32-NEXT: retl
340 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
341 %2 = bitcast i32 %mask to <32 x i1>
342 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
346 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
347 ; AVX512BW-LABEL: test_mask_packus_epi32_rm_512:
349 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0
350 ; AVX512BW-NEXT: retq
352 ; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512:
353 ; AVX512F-32: # BB#0:
354 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
355 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0
356 ; AVX512F-32-NEXT: retl
357 %b = load <16 x i32>, <16 x i32>* %ptr_b
358 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
362 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
363 ; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512:
365 ; AVX512BW-NEXT: kmovd %esi, %k1
366 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
367 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
368 ; AVX512BW-NEXT: retq
370 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512:
371 ; AVX512F-32: # BB#0:
372 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
373 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
374 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1}
375 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
376 ; AVX512F-32-NEXT: retl
377 %b = load <16 x i32>, <16 x i32>* %ptr_b
378 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
379 %2 = bitcast i32 %mask to <32 x i1>
380 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
384 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
385 ; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512:
387 ; AVX512BW-NEXT: kmovd %esi, %k1
388 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
389 ; AVX512BW-NEXT: retq
391 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512:
392 ; AVX512F-32: # BB#0:
393 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
394 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
395 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z}
396 ; AVX512F-32-NEXT: retl
397 %b = load <16 x i32>, <16 x i32>* %ptr_b
398 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
399 %2 = bitcast i32 %mask to <32 x i1>
400 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
404 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
405 ; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512:
407 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
408 ; AVX512BW-NEXT: retq
410 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512:
411 ; AVX512F-32: # BB#0:
412 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
413 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0
414 ; AVX512F-32-NEXT: retl
415 %q = load i32, i32* %ptr_b
416 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
417 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
418 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
422 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
423 ; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512:
425 ; AVX512BW-NEXT: kmovd %esi, %k1
426 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
427 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
428 ; AVX512BW-NEXT: retq
430 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512:
431 ; AVX512F-32: # BB#0:
432 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
433 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
434 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
435 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
436 ; AVX512F-32-NEXT: retl
437 %q = load i32, i32* %ptr_b
438 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
439 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
440 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
441 %2 = bitcast i32 %mask to <32 x i1>
442 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
446 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
447 ; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512:
449 ; AVX512BW-NEXT: kmovd %esi, %k1
450 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
451 ; AVX512BW-NEXT: retq
453 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512:
454 ; AVX512F-32: # BB#0:
455 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
456 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
457 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
458 ; AVX512F-32-NEXT: retl
459 %q = load i32, i32* %ptr_b
460 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
461 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
462 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
463 %2 = bitcast i32 %mask to <32 x i1>
464 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
468 declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>)
470 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
471 ; AVX512BW-LABEL: test_mask_packus_epi16_rr_512:
473 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
474 ; AVX512BW-NEXT: retq
476 ; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512:
477 ; AVX512F-32: # BB#0:
478 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
479 ; AVX512F-32-NEXT: retl
480 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
484 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
485 ; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512:
487 ; AVX512BW-NEXT: kmovq %rdi, %k1
488 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
489 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
490 ; AVX512BW-NEXT: retq
492 ; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512:
493 ; AVX512F-32: # BB#0:
494 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
495 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
496 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
497 ; AVX512F-32-NEXT: retl
498 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
499 %2 = bitcast i64 %mask to <64 x i1>
500 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
504 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
505 ; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512:
507 ; AVX512BW-NEXT: kmovq %rdi, %k1
508 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
509 ; AVX512BW-NEXT: retq
511 ; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512:
512 ; AVX512F-32: # BB#0:
513 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
514 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
515 ; AVX512F-32-NEXT: retl
516 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
517 %2 = bitcast i64 %mask to <64 x i1>
518 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
522 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
523 ; AVX512BW-LABEL: test_mask_packus_epi16_rm_512:
525 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0
526 ; AVX512BW-NEXT: retq
528 ; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512:
529 ; AVX512F-32: # BB#0:
530 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
531 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0
532 ; AVX512F-32-NEXT: retl
533 %b = load <32 x i16>, <32 x i16>* %ptr_b
534 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
538 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
539 ; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512:
541 ; AVX512BW-NEXT: kmovq %rsi, %k1
542 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
543 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
544 ; AVX512BW-NEXT: retq
546 ; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512:
547 ; AVX512F-32: # BB#0:
548 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
549 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
550 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1}
551 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
552 ; AVX512F-32-NEXT: retl
553 %b = load <32 x i16>, <32 x i16>* %ptr_b
554 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
555 %2 = bitcast i64 %mask to <64 x i1>
556 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
560 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
561 ; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512:
563 ; AVX512BW-NEXT: kmovq %rsi, %k1
564 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
565 ; AVX512BW-NEXT: retq
567 ; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512:
568 ; AVX512F-32: # BB#0:
569 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
570 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
571 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z}
572 ; AVX512F-32-NEXT: retl
573 %b = load <32 x i16>, <32 x i16>* %ptr_b
574 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
575 %2 = bitcast i64 %mask to <64 x i1>
576 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
580 declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>)
582 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
583 ; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
585 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
586 ; AVX512BW-NEXT: retq
588 ; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:
589 ; AVX512F-32: # BB#0:
590 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
591 ; AVX512F-32-NEXT: retl
592 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
596 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
597 ; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
599 ; AVX512BW-NEXT: kmovd %edi, %k1
600 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
601 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
602 ; AVX512BW-NEXT: retq
604 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
605 ; AVX512F-32: # BB#0:
606 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
607 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
608 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
609 ; AVX512F-32-NEXT: retl
610 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
614 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
615 ; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
617 ; AVX512BW-NEXT: kmovd %edi, %k1
618 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
619 ; AVX512BW-NEXT: retq
621 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
622 ; AVX512F-32: # BB#0:
623 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
624 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
625 ; AVX512F-32-NEXT: retl
626 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
630 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
631 ; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
633 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0
634 ; AVX512BW-NEXT: retq
636 ; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
637 ; AVX512F-32: # BB#0:
638 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
639 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0
640 ; AVX512F-32-NEXT: retl
641 %b = load <32 x i16>, <32 x i16>* %ptr_b
642 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
646 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
647 ; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
649 ; AVX512BW-NEXT: kmovd %esi, %k1
650 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
651 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
652 ; AVX512BW-NEXT: retq
654 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
655 ; AVX512F-32: # BB#0:
656 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
657 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
658 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1}
659 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
660 ; AVX512F-32-NEXT: retl
661 %b = load <32 x i16>, <32 x i16>* %ptr_b
662 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
666 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
667 ; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
669 ; AVX512BW-NEXT: kmovd %esi, %k1
670 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
671 ; AVX512BW-NEXT: retq
673 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
674 ; AVX512F-32: # BB#0:
675 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
676 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
677 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
678 ; AVX512F-32-NEXT: retl
679 %b = load <32 x i16>, <32 x i16>* %ptr_b
680 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
684 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
686 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
687 ; AVX512BW-LABEL: test_mask_subs_epi16_rr_512:
689 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
690 ; AVX512BW-NEXT: retq
692 ; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512:
693 ; AVX512F-32: # BB#0:
694 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
695 ; AVX512F-32-NEXT: retl
696 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
700 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
701 ; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
703 ; AVX512BW-NEXT: kmovd %edi, %k1
704 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
705 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
706 ; AVX512BW-NEXT: retq
708 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
709 ; AVX512F-32: # BB#0:
710 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
711 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
712 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
713 ; AVX512F-32-NEXT: retl
714 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
718 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
719 ; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
721 ; AVX512BW-NEXT: kmovd %edi, %k1
722 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
723 ; AVX512BW-NEXT: retq
725 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
726 ; AVX512F-32: # BB#0:
727 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
728 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
729 ; AVX512F-32-NEXT: retl
730 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
734 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
735 ; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
737 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0
738 ; AVX512BW-NEXT: retq
740 ; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
741 ; AVX512F-32: # BB#0:
742 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
743 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0
744 ; AVX512F-32-NEXT: retl
745 %b = load <32 x i16>, <32 x i16>* %ptr_b
746 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
750 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
751 ; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
753 ; AVX512BW-NEXT: kmovd %esi, %k1
754 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
755 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
756 ; AVX512BW-NEXT: retq
758 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
759 ; AVX512F-32: # BB#0:
760 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
761 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
762 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1}
763 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
764 ; AVX512F-32-NEXT: retl
765 %b = load <32 x i16>, <32 x i16>* %ptr_b
766 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
770 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
771 ; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
773 ; AVX512BW-NEXT: kmovd %esi, %k1
774 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
775 ; AVX512BW-NEXT: retq
777 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
778 ; AVX512F-32: # BB#0:
779 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
780 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
781 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
782 ; AVX512F-32-NEXT: retl
783 %b = load <32 x i16>, <32 x i16>* %ptr_b
784 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
788 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
790 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
791 ; AVX512BW-LABEL: test_mask_adds_epu16_rr_512:
793 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
794 ; AVX512BW-NEXT: retq
796 ; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512:
797 ; AVX512F-32: # BB#0:
798 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
799 ; AVX512F-32-NEXT: retl
800 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
804 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
805 ; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
807 ; AVX512BW-NEXT: kmovd %edi, %k1
808 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
809 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
810 ; AVX512BW-NEXT: retq
812 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
813 ; AVX512F-32: # BB#0:
814 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
815 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
816 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
817 ; AVX512F-32-NEXT: retl
818 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
822 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
823 ; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
825 ; AVX512BW-NEXT: kmovd %edi, %k1
826 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
827 ; AVX512BW-NEXT: retq
829 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
830 ; AVX512F-32: # BB#0:
831 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
832 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
833 ; AVX512F-32-NEXT: retl
834 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
838 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
839 ; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
841 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0
842 ; AVX512BW-NEXT: retq
844 ; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
845 ; AVX512F-32: # BB#0:
846 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
847 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0
848 ; AVX512F-32-NEXT: retl
849 %b = load <32 x i16>, <32 x i16>* %ptr_b
850 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
854 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
855 ; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
857 ; AVX512BW-NEXT: kmovd %esi, %k1
858 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
859 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
860 ; AVX512BW-NEXT: retq
862 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
863 ; AVX512F-32: # BB#0:
864 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
865 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
866 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1}
867 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
868 ; AVX512F-32-NEXT: retl
869 %b = load <32 x i16>, <32 x i16>* %ptr_b
870 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
874 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
875 ; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
877 ; AVX512BW-NEXT: kmovd %esi, %k1
878 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
879 ; AVX512BW-NEXT: retq
881 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
882 ; AVX512F-32: # BB#0:
883 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
884 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
885 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
886 ; AVX512F-32-NEXT: retl
887 %b = load <32 x i16>, <32 x i16>* %ptr_b
888 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
892 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
894 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
895 ; AVX512BW-LABEL: test_mask_subs_epu16_rr_512:
897 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
898 ; AVX512BW-NEXT: retq
900 ; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512:
901 ; AVX512F-32: # BB#0:
902 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
903 ; AVX512F-32-NEXT: retl
904 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
908 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
909 ; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
911 ; AVX512BW-NEXT: kmovd %edi, %k1
912 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
913 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
914 ; AVX512BW-NEXT: retq
916 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
917 ; AVX512F-32: # BB#0:
918 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
919 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
920 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
921 ; AVX512F-32-NEXT: retl
922 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
926 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
927 ; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
929 ; AVX512BW-NEXT: kmovd %edi, %k1
930 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
931 ; AVX512BW-NEXT: retq
933 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
934 ; AVX512F-32: # BB#0:
935 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
936 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
937 ; AVX512F-32-NEXT: retl
938 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
942 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
943 ; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
945 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0
946 ; AVX512BW-NEXT: retq
948 ; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
949 ; AVX512F-32: # BB#0:
950 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
951 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0
952 ; AVX512F-32-NEXT: retl
953 %b = load <32 x i16>, <32 x i16>* %ptr_b
954 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
958 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
959 ; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
961 ; AVX512BW-NEXT: kmovd %esi, %k1
962 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
963 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
964 ; AVX512BW-NEXT: retq
966 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
967 ; AVX512F-32: # BB#0:
968 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
969 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
970 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1}
971 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
972 ; AVX512F-32-NEXT: retl
973 %b = load <32 x i16>, <32 x i16>* %ptr_b
974 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
978 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
979 ; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
981 ; AVX512BW-NEXT: kmovd %esi, %k1
982 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
983 ; AVX512BW-NEXT: retq
985 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
986 ; AVX512F-32: # BB#0:
987 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
988 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
989 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
990 ; AVX512F-32-NEXT: retl
991 %b = load <32 x i16>, <32 x i16>* %ptr_b
992 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
996 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
998 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1000 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1001 ; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
1002 ; AVX512BW: ## BB#0:
1003 ; AVX512BW-NEXT: kmovd %edi, %k1
1004 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
1005 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
1006 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1007 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1008 ; AVX512BW-NEXT: retq
1010 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
1011 ; AVX512F-32: # BB#0:
1012 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1013 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
1014 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
1015 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1016 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1017 ; AVX512F-32-NEXT: retl
1018 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1019 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1020 %res2 = add <32 x i16> %res, %res1
1021 ret <32 x i16> %res2
1024 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1026 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1027 ; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
1028 ; AVX512BW: ## BB#0:
1029 ; AVX512BW-NEXT: kmovd %edi, %k1
1030 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
1031 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
1032 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1033 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1034 ; AVX512BW-NEXT: retq
1036 ; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
1037 ; AVX512F-32: # BB#0:
1038 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1039 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
1040 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
1041 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1042 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1043 ; AVX512F-32-NEXT: retl
1044 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1045 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1046 %res2 = add <32 x i16> %res, %res1
1047 ret <32 x i16> %res2
1050 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1052 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1053 ; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
1054 ; AVX512BW: ## BB#0:
1055 ; AVX512BW-NEXT: kmovd %edi, %k1
1056 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
1057 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1058 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
1059 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1060 ; AVX512BW-NEXT: retq
1062 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
1063 ; AVX512F-32: # BB#0:
1064 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1065 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
1066 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1067 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
1068 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1069 ; AVX512F-32-NEXT: retl
1070 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1071 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1072 %res2 = add <32 x i16> %res, %res1
1073 ret <32 x i16> %res2
1076 declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
1078 define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) {
1079 ; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512:
1080 ; AVX512BW: ## BB#0:
1081 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
1082 ; AVX512BW-NEXT: retq
1084 ; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512:
1085 ; AVX512F-32: # BB#0:
1086 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
1087 ; AVX512F-32-NEXT: retl
1088 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
1092 define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) {
1093 ; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
1094 ; AVX512BW: ## BB#0:
1095 ; AVX512BW-NEXT: kmovq %rdi, %k1
1096 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
1097 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
1098 ; AVX512BW-NEXT: retq
1100 ; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
1101 ; AVX512F-32: # BB#0:
1102 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
1103 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
1104 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
1105 ; AVX512F-32-NEXT: retl
1106 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
1107 %mask.cast = bitcast i64 %mask to <64 x i1>
1108 %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2
1112 define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) {
1113 ; AVX512BW-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
1114 ; AVX512BW: ## BB#0:
1115 ; AVX512BW-NEXT: kmovq %rdi, %k1
1116 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z}
1117 ; AVX512BW-NEXT: retq
1119 ; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
1120 ; AVX512F-32: # BB#0:
1121 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
1122 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z}
1123 ; AVX512F-32-NEXT: retl
1124 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
1125 %mask.cast = bitcast i64 %mask to <64 x i1>
1126 %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer
1130 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1132 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1133 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
1134 ; AVX512BW: ## BB#0:
1135 ; AVX512BW-NEXT: kmovd %edi, %k1
1136 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
1137 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
1138 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1139 ; AVX512BW-NEXT: retq
1141 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
1142 ; AVX512F-32: # BB#0:
1143 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1144 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
1145 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
1146 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1147 ; AVX512F-32-NEXT: retl
1148 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1149 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1150 %res2 = add <32 x i16> %res, %res1
1151 ret <32 x i16> %res2
1154 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1156 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1157 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
1158 ; AVX512BW: ## BB#0:
1159 ; AVX512BW-NEXT: kmovd %edi, %k1
1160 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
1161 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
1162 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1163 ; AVX512BW-NEXT: retq
1165 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
1166 ; AVX512F-32: # BB#0:
1167 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1168 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
1169 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
1170 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1171 ; AVX512F-32-NEXT: retl
1172 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1173 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1174 %res2 = add <32 x i16> %res, %res1
1175 ret <32 x i16> %res2
1178 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1180 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1181 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
1182 ; AVX512BW: ## BB#0:
1183 ; AVX512BW-NEXT: kmovd %edi, %k1
1184 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
1185 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
1186 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1187 ; AVX512BW-NEXT: retq
1189 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
1190 ; AVX512F-32: # BB#0:
1191 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1192 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
1193 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
1194 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1195 ; AVX512F-32-NEXT: retl
1196 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1197 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1198 %res2 = add <32 x i16> %res, %res1
1199 ret <32 x i16> %res2
1202 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
1204 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1205 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
1206 ; AVX512BW: ## BB#0:
1207 ; AVX512BW-NEXT: kmovd %edi, %k1
1208 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
1209 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
1210 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1211 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1212 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1213 ; AVX512BW-NEXT: retq
1215 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
1216 ; AVX512F-32: # BB#0:
1217 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1218 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
1219 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
1220 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0
1221 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1222 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1223 ; AVX512F-32-NEXT: retl
1224 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1225 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1226 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1227 %res3 = add <32 x i8> %res0, %res1
1228 %res4 = add <32 x i8> %res3, %res2
1232 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1234 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1235 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1236 ; AVX512BW: ## BB#0:
1237 ; AVX512BW-NEXT: kmovd %esi, %k1
1238 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi)
1239 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
1240 ; AVX512BW-NEXT: retq
1242 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1243 ; AVX512F-32: # BB#0:
1244 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1245 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1246 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax)
1247 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1}
1248 ; AVX512F-32-NEXT: retl
1249 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1250 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1254 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
1256 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1257 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1258 ; AVX512BW: ## BB#0:
1259 ; AVX512BW-NEXT: kmovd %edi, %k1
1260 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
1261 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
1262 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0
1263 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1264 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1265 ; AVX512BW-NEXT: retq
1267 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1268 ; AVX512F-32: # BB#0:
1269 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1270 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
1271 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
1272 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0
1273 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1274 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1275 ; AVX512F-32-NEXT: retl
1276 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1277 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1278 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1279 %res3 = add <32 x i8> %res0, %res1
1280 %res4 = add <32 x i8> %res3, %res2
1284 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1286 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1287 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1288 ; AVX512BW: ## BB#0:
1289 ; AVX512BW-NEXT: kmovd %esi, %k1
1290 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi)
1291 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1}
1292 ; AVX512BW-NEXT: retq
1294 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1295 ; AVX512F-32: # BB#0:
1296 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1297 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1298 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax)
1299 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax) {%k1}
1300 ; AVX512F-32-NEXT: retl
1301 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1302 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1306 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
1308 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1309 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1310 ; AVX512BW: ## BB#0:
1311 ; AVX512BW-NEXT: kmovd %edi, %k1
1312 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
1313 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
1314 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
1315 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1316 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1317 ; AVX512BW-NEXT: retq
1319 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1320 ; AVX512F-32: # BB#0:
1321 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1322 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
1323 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
1324 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0
1325 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1326 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1327 ; AVX512F-32-NEXT: retl
1328 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1329 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1330 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1331 %res3 = add <32 x i8> %res0, %res1
1332 %res4 = add <32 x i8> %res3, %res2
1336 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1338 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1339 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1340 ; AVX512BW: ## BB#0:
1341 ; AVX512BW-NEXT: kmovd %esi, %k1
1342 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi)
1343 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1}
1344 ; AVX512BW-NEXT: retq
1346 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1347 ; AVX512F-32: # BB#0:
1348 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1349 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1350 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax)
1351 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax) {%k1}
1352 ; AVX512F-32-NEXT: retl
1353 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1354 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1358 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
1360 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
1361 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1362 ; AVX512BW: ## BB#0:
1363 ; AVX512BW-NEXT: kmovd %edi, %k1
1364 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
1365 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
1366 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1367 ; AVX512BW-NEXT: retq
1369 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1370 ; AVX512F-32: # BB#0:
1371 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1372 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
1373 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
1374 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1375 ; AVX512F-32-NEXT: retl
1376 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
1377 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
1378 %res2 = add <32 x i16> %res, %res1
1379 ret <32 x i16> %res2
1382 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
1384 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
1385 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1386 ; AVX512BW: ## BB#0:
1387 ; AVX512BW-NEXT: kmovd %edi, %k1
1388 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
1389 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
1390 ; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0
1391 ; AVX512BW-NEXT: retq
1393 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1394 ; AVX512F-32: # BB#0:
1395 ; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1396 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
1397 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
1398 ; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0
1399 ; AVX512F-32-NEXT: retl
1400 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
1401 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
1402 %res2 = add <16 x i32> %res, %res1
1403 ret <16 x i32> %res2
1406 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
1408 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
1409 ; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1410 ; AVX512BW: ## BB#0:
1411 ; AVX512BW-NEXT: kmovd %edi, %k1
1412 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
1413 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
1414 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm2
1415 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
1416 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1417 ; AVX512BW-NEXT: retq
1419 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1420 ; AVX512F-32: # BB#0:
1421 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1422 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
1423 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
1424 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm2
1425 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
1426 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1427 ; AVX512F-32-NEXT: retl
1428 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
1429 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
1430 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
1431 %res3 = add <32 x i16> %res, %res1
1432 %res4 = add <32 x i16> %res3, %res2
1433 ret <32 x i16> %res4
1436 declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
1438 define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
1439 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512:
1440 ; AVX512BW: ## BB#0:
1441 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
1442 ; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
1443 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
1444 ; AVX512BW-NEXT: retq
1446 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512:
1447 ; AVX512F-32: # BB#0:
1448 ; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
1449 ; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
1450 ; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
1451 ; AVX512F-32-NEXT: retl
1452 %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
1453 %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
1454 %res2 = add <8 x i64> %res, %res1
1458 declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
1460 define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
1461 ; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
1462 ; AVX512BW: ## BB#0:
1463 ; AVX512BW-NEXT: kmovd %edi, %k0
1464 ; AVX512BW-NEXT: kmovd %esi, %k1
1465 ; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0
1466 ; AVX512BW-NEXT: kmovd %k0, %eax
1467 ; AVX512BW-NEXT: retq
1469 ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
1470 ; AVX512F-32: # BB#0:
1471 ; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k0
1472 ; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1473 ; AVX512F-32-NEXT: kunpckwd %k0, %k1, %k0
1474 ; AVX512F-32-NEXT: kmovd %k0, %eax
1475 ; AVX512F-32-NEXT: retl
1476 %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
1480 declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
1482 define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
1483 ; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
1484 ; AVX512BW: ## BB#0:
1485 ; AVX512BW-NEXT: kmovq %rdi, %k0
1486 ; AVX512BW-NEXT: kmovq %rsi, %k1
1487 ; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0
1488 ; AVX512BW-NEXT: kmovq %k0, %rax
1489 ; AVX512BW-NEXT: retq
1491 ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
1492 ; AVX512F-32: # BB#0:
1493 ; AVX512F-32-NEXT: subl $12, %esp
1494 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
1495 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1496 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1497 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
1498 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
1499 ; AVX512F-32-NEXT: movl (%esp), %eax
1500 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
1501 ; AVX512F-32-NEXT: addl $12, %esp
1502 ; AVX512F-32-NEXT: retl
1503 %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
1507 declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
1509 define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
1510 ; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512:
1511 ; AVX512BW: ## BB#0:
1512 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
1513 ; AVX512BW-NEXT: kmovq %k0, %rax
1514 ; AVX512BW-NEXT: retq
1516 ; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
1517 ; AVX512F-32: # BB#0:
1518 ; AVX512F-32-NEXT: subl $12, %esp
1519 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
1520 ; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
1521 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
1522 ; AVX512F-32-NEXT: movl (%esp), %eax
1523 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
1524 ; AVX512F-32-NEXT: addl $12, %esp
1525 ; AVX512F-32-NEXT: retl
1526 %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
1530 declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
1532 define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
1533 ; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512:
1534 ; AVX512BW: ## BB#0:
1535 ; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
1536 ; AVX512BW-NEXT: kmovd %k0, %eax
1537 ; AVX512BW-NEXT: retq
1539 ; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512:
1540 ; AVX512F-32: # BB#0:
1541 ; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0
1542 ; AVX512F-32-NEXT: kmovd %k0, %eax
1543 ; AVX512F-32-NEXT: retl
1544 %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
1548 declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1550 define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1551 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psrlv32hi:
1552 ; AVX512BW: ## BB#0:
1553 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3
1554 ; AVX512BW-NEXT: kmovd %edi, %k1
1555 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
1556 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z}
1557 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1558 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
1559 ; AVX512BW-NEXT: retq
1561 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi:
1562 ; AVX512F-32: # BB#0:
1563 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3
1564 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1565 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
1566 ; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z}
1567 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1568 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
1569 ; AVX512F-32-NEXT: retl
1570 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1571 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1572 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1573 %res3 = add <32 x i16> %res, %res1
1574 %res4 = add <32 x i16> %res3, %res2
1575 ret <32 x i16> %res4
1578 declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1580 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1581 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi:
1582 ; AVX512BW: ## BB#0:
1583 ; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm3
1584 ; AVX512BW-NEXT: kmovd %edi, %k1
1585 ; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
1586 ; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z}
1587 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1588 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
1589 ; AVX512BW-NEXT: retq
1591 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi:
1592 ; AVX512F-32: # BB#0:
1593 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3
1594 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1595 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
1596 ; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z}
1597 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1598 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
1599 ; AVX512F-32-NEXT: retl
1600 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1601 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1602 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1603 %res3 = add <32 x i16> %res, %res1
1604 %res4 = add <32 x i16> %res3, %res2
1605 ret <32 x i16> %res4
1608 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1609 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
1610 ; AVX512BW: ## BB#0:
1611 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1612 ; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0
1613 ; AVX512BW-NEXT: retq
1615 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
1616 ; AVX512F-32: # BB#0:
1617 ; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1618 ; AVX512F-32-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0
1619 ; AVX512F-32-NEXT: retl
1620 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>,
1621 <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>,
1622 <32 x i16> zeroinitializer, i32 -1)
1626 declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1628 define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1629 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psllv32hi:
1630 ; AVX512BW: ## BB#0:
1631 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm3
1632 ; AVX512BW-NEXT: kmovd %edi, %k1
1633 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
1634 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z}
1635 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1636 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
1637 ; AVX512BW-NEXT: retq
1639 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi:
1640 ; AVX512F-32: # BB#0:
1641 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3
1642 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1643 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
1644 ; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z}
1645 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1646 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
1647 ; AVX512F-32-NEXT: retl
1648 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1649 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1650 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1651 %res3 = add <32 x i16> %res, %res1
1652 %res4 = add <32 x i16> %res3, %res2
1653 ret <32 x i16> %res4
1656 declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1658 define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1659 ; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
1660 ; AVX512BW: ## BB#0:
1661 ; AVX512BW-NEXT: kmovd %edi, %k1
1662 ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
1663 ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
1664 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm2
1665 ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
1666 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1667 ; AVX512BW-NEXT: retq
1669 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
1670 ; AVX512F-32: # BB#0:
1671 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1672 ; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
1673 ; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
1674 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm2
1675 ; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0
1676 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1677 ; AVX512F-32-NEXT: retl
1678 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1679 %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1680 %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1681 %res3 = add <32 x i16> %res, %res1
1682 %res4 = add <32 x i16> %res3, %res2
1683 ret <32 x i16> %res4
1686 define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1687 ; AVX512BW-LABEL: test_x86_avx512_psll_w_512:
1688 ; AVX512BW: ## BB#0:
1689 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
1690 ; AVX512BW-NEXT: retq
1692 ; AVX512F-32-LABEL: test_x86_avx512_psll_w_512:
1693 ; AVX512F-32: # BB#0:
1694 ; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0
1695 ; AVX512F-32-NEXT: retl
1696 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1699 define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1700 ; AVX512BW-LABEL: test_x86_avx512_mask_psll_w_512:
1701 ; AVX512BW: ## BB#0:
1702 ; AVX512BW-NEXT: kmovd %edi, %k1
1703 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
1704 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
1705 ; AVX512BW-NEXT: retq
1707 ; AVX512F-32-LABEL: test_x86_avx512_mask_psll_w_512:
1708 ; AVX512F-32: # BB#0:
1709 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1710 ; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
1711 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
1712 ; AVX512F-32-NEXT: retl
1713 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1714 %mask.cast = bitcast i32 %mask to <32 x i1>
1715 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1716 ret <32 x i16> %res2
1718 define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1719 ; AVX512BW-LABEL: test_x86_avx512_maskz_psll_w_512:
1720 ; AVX512BW: ## BB#0:
1721 ; AVX512BW-NEXT: kmovd %edi, %k1
1722 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z}
1723 ; AVX512BW-NEXT: retq
1725 ; AVX512F-32-LABEL: test_x86_avx512_maskz_psll_w_512:
1726 ; AVX512F-32: # BB#0:
1727 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1728 ; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z}
1729 ; AVX512F-32-NEXT: retl
1730 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1731 %mask.cast = bitcast i32 %mask to <32 x i1>
1732 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1733 ret <32 x i16> %res2
1735 declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1738 define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) {
1739 ; AVX512BW-LABEL: test_x86_avx512_pslli_w_512:
1740 ; AVX512BW: ## BB#0:
1741 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0
1742 ; AVX512BW-NEXT: retq
1744 ; AVX512F-32-LABEL: test_x86_avx512_pslli_w_512:
1745 ; AVX512F-32: # BB#0:
1746 ; AVX512F-32-NEXT: vpsllw $7, %zmm0, %zmm0
1747 ; AVX512F-32-NEXT: retl
1748 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1751 define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1752 ; AVX512BW-LABEL: test_x86_avx512_mask_pslli_w_512:
1753 ; AVX512BW: ## BB#0:
1754 ; AVX512BW-NEXT: kmovd %edi, %k1
1755 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1}
1756 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
1757 ; AVX512BW-NEXT: retq
1759 ; AVX512F-32-LABEL: test_x86_avx512_mask_pslli_w_512:
1760 ; AVX512F-32: # BB#0:
1761 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1762 ; AVX512F-32-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1}
1763 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
1764 ; AVX512F-32-NEXT: retl
1765 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1766 %mask.cast = bitcast i32 %mask to <32 x i1>
1767 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1768 ret <32 x i16> %res2
1770 define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) {
1771 ; AVX512BW-LABEL: test_x86_avx512_maskz_pslli_w_512:
1772 ; AVX512BW: ## BB#0:
1773 ; AVX512BW-NEXT: kmovd %edi, %k1
1774 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z}
1775 ; AVX512BW-NEXT: retq
1777 ; AVX512F-32-LABEL: test_x86_avx512_maskz_pslli_w_512:
1778 ; AVX512F-32: # BB#0:
1779 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1780 ; AVX512F-32-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z}
1781 ; AVX512F-32-NEXT: retl
1782 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1783 %mask.cast = bitcast i32 %mask to <32 x i1>
1784 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1785 ret <32 x i16> %res2
1787 declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone
1790 define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1791 ; AVX512BW-LABEL: test_x86_avx512_psra_w_512:
1792 ; AVX512BW: ## BB#0:
1793 ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
1794 ; AVX512BW-NEXT: retq
1796 ; AVX512F-32-LABEL: test_x86_avx512_psra_w_512:
1797 ; AVX512F-32: # BB#0:
1798 ; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0
1799 ; AVX512F-32-NEXT: retl
1800 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1803 define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1804 ; AVX512BW-LABEL: test_x86_avx512_mask_psra_w_512:
1805 ; AVX512BW: ## BB#0:
1806 ; AVX512BW-NEXT: kmovd %edi, %k1
1807 ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
1808 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
1809 ; AVX512BW-NEXT: retq
1811 ; AVX512F-32-LABEL: test_x86_avx512_mask_psra_w_512:
1812 ; AVX512F-32: # BB#0:
1813 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1814 ; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
1815 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
1816 ; AVX512F-32-NEXT: retl
1817 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1818 %mask.cast = bitcast i32 %mask to <32 x i1>
1819 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1820 ret <32 x i16> %res2
1822 define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1823 ; AVX512BW-LABEL: test_x86_avx512_maskz_psra_w_512:
1824 ; AVX512BW: ## BB#0:
1825 ; AVX512BW-NEXT: kmovd %edi, %k1
1826 ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z}
1827 ; AVX512BW-NEXT: retq
1829 ; AVX512F-32-LABEL: test_x86_avx512_maskz_psra_w_512:
1830 ; AVX512F-32: # BB#0:
1831 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1832 ; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z}
1833 ; AVX512F-32-NEXT: retl
1834 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1835 %mask.cast = bitcast i32 %mask to <32 x i1>
1836 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1837 ret <32 x i16> %res2
1839 declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1842 define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) {
1843 ; AVX512BW-LABEL: test_x86_avx512_psrai_w_512:
1844 ; AVX512BW: ## BB#0:
1845 ; AVX512BW-NEXT: vpsraw $7, %zmm0, %zmm0
1846 ; AVX512BW-NEXT: retq
1848 ; AVX512F-32-LABEL: test_x86_avx512_psrai_w_512:
1849 ; AVX512F-32: # BB#0:
1850 ; AVX512F-32-NEXT: vpsraw $7, %zmm0, %zmm0
1851 ; AVX512F-32-NEXT: retl
1852 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1855 define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1856 ; AVX512BW-LABEL: test_x86_avx512_mask_psrai_w_512:
1857 ; AVX512BW: ## BB#0:
1858 ; AVX512BW-NEXT: kmovd %edi, %k1
1859 ; AVX512BW-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1}
1860 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
1861 ; AVX512BW-NEXT: retq
1863 ; AVX512F-32-LABEL: test_x86_avx512_mask_psrai_w_512:
1864 ; AVX512F-32: # BB#0:
1865 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1866 ; AVX512F-32-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1}
1867 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
1868 ; AVX512F-32-NEXT: retl
1869 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1870 %mask.cast = bitcast i32 %mask to <32 x i1>
1871 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1872 ret <32 x i16> %res2
1874 define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1875 ; AVX512BW-LABEL: test_x86_avx512_maskz_psrai_w_512:
1876 ; AVX512BW: ## BB#0:
1877 ; AVX512BW-NEXT: kmovd %edi, %k1
1878 ; AVX512BW-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z}
1879 ; AVX512BW-NEXT: retq
1881 ; AVX512F-32-LABEL: test_x86_avx512_maskz_psrai_w_512:
1882 ; AVX512F-32: # BB#0:
1883 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1884 ; AVX512F-32-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z}
1885 ; AVX512F-32-NEXT: retl
1886 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1887 %mask.cast = bitcast i32 %mask to <32 x i1>
1888 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1889 ret <32 x i16> %res2
1891 declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone
1894 define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1895 ; AVX512BW-LABEL: test_x86_avx512_psrl_w_512:
1896 ; AVX512BW: ## BB#0:
1897 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
1898 ; AVX512BW-NEXT: retq
1900 ; AVX512F-32-LABEL: test_x86_avx512_psrl_w_512:
1901 ; AVX512F-32: # BB#0:
1902 ; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
1903 ; AVX512F-32-NEXT: retl
1904 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1907 define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1908 ; AVX512BW-LABEL: test_x86_avx512_mask_psrl_w_512:
1909 ; AVX512BW: ## BB#0:
1910 ; AVX512BW-NEXT: kmovd %edi, %k1
1911 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
1912 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
1913 ; AVX512BW-NEXT: retq
1915 ; AVX512F-32-LABEL: test_x86_avx512_mask_psrl_w_512:
1916 ; AVX512F-32: # BB#0:
1917 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1918 ; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
1919 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
1920 ; AVX512F-32-NEXT: retl
1921 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1922 %mask.cast = bitcast i32 %mask to <32 x i1>
1923 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1924 ret <32 x i16> %res2
1926 define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1927 ; AVX512BW-LABEL: test_x86_avx512_maskz_psrl_w_512:
1928 ; AVX512BW: ## BB#0:
1929 ; AVX512BW-NEXT: kmovd %edi, %k1
1930 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z}
1931 ; AVX512BW-NEXT: retq
1933 ; AVX512F-32-LABEL: test_x86_avx512_maskz_psrl_w_512:
1934 ; AVX512F-32: # BB#0:
1935 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1936 ; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z}
1937 ; AVX512F-32-NEXT: retl
1938 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1939 %mask.cast = bitcast i32 %mask to <32 x i1>
1940 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1941 ret <32 x i16> %res2
1943 declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1946 define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) {
1947 ; AVX512BW-LABEL: test_x86_avx512_psrli_w_512:
1948 ; AVX512BW: ## BB#0:
1949 ; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0
1950 ; AVX512BW-NEXT: retq
1952 ; AVX512F-32-LABEL: test_x86_avx512_psrli_w_512:
1953 ; AVX512F-32: # BB#0:
1954 ; AVX512F-32-NEXT: vpsrlw $7, %zmm0, %zmm0
1955 ; AVX512F-32-NEXT: retl
1956 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1959 define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1960 ; AVX512BW-LABEL: test_x86_avx512_mask_psrli_w_512:
1961 ; AVX512BW: ## BB#0:
1962 ; AVX512BW-NEXT: kmovd %edi, %k1
1963 ; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1}
1964 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
1965 ; AVX512BW-NEXT: retq
1967 ; AVX512F-32-LABEL: test_x86_avx512_mask_psrli_w_512:
1968 ; AVX512F-32: # BB#0:
1969 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1970 ; AVX512F-32-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1}
1971 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
1972 ; AVX512F-32-NEXT: retl
1973 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1974 %mask.cast = bitcast i32 %mask to <32 x i1>
1975 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1976 ret <32 x i16> %res2
1978 define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) {
1979 ; AVX512BW-LABEL: test_x86_avx512_maskz_psrli_w_512:
1980 ; AVX512BW: ## BB#0:
1981 ; AVX512BW-NEXT: kmovd %edi, %k1
1982 ; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z}
1983 ; AVX512BW-NEXT: retq
1985 ; AVX512F-32-LABEL: test_x86_avx512_maskz_psrli_w_512:
1986 ; AVX512F-32: # BB#0:
1987 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1988 ; AVX512F-32-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z}
1989 ; AVX512F-32-NEXT: retl
1990 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1991 %mask.cast = bitcast i32 %mask to <32 x i1>
1992 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1993 ret <32 x i16> %res2
1995 declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone