1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
3 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512F-32
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c
11 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
12 ; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
14 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
17 ; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:
18 ; AVX512F-32: # %bb.0:
19 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
20 ; AVX512F-32-NEXT: retl
21 %res = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
24 declare <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16>, <32 x i16>)
26 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
27 ; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
29 ; AVX512BW-NEXT: kmovd %edi, %k1
30 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
31 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
34 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
35 ; AVX512F-32: # %bb.0:
36 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
37 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
38 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
39 ; AVX512F-32-NEXT: retl
40 %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
41 %2 = bitcast i32 %mask to <32 x i1>
42 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
46 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
47 ; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
49 ; AVX512BW-NEXT: kmovd %edi, %k1
50 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
53 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
54 ; AVX512F-32: # %bb.0:
55 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
56 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
57 ; AVX512F-32-NEXT: retl
58 %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
59 %2 = bitcast i32 %mask to <32 x i1>
60 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
64 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
65 ; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
67 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0
70 ; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
71 ; AVX512F-32: # %bb.0:
72 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
73 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0
74 ; AVX512F-32-NEXT: retl
75 %b = load <32 x i16>, ptr %ptr_b
76 %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
80 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
81 ; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
83 ; AVX512BW-NEXT: kmovd %esi, %k1
84 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
85 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
88 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
89 ; AVX512F-32: # %bb.0:
90 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
91 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
92 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1}
93 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
94 ; AVX512F-32-NEXT: retl
95 %b = load <32 x i16>, ptr %ptr_b
96 %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
97 %2 = bitcast i32 %mask to <32 x i1>
98 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
102 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
103 ; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
104 ; AVX512BW: ## %bb.0:
105 ; AVX512BW-NEXT: kmovd %esi, %k1
106 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
107 ; AVX512BW-NEXT: retq
109 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
110 ; AVX512F-32: # %bb.0:
111 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
112 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
113 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
114 ; AVX512F-32-NEXT: retl
115 %b = load <32 x i16>, ptr %ptr_b
116 %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
117 %2 = bitcast i32 %mask to <32 x i1>
118 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
122 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
123 ; AVX512BW-LABEL: test_mask_subs_epi16_rr_512:
124 ; AVX512BW: ## %bb.0:
125 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
126 ; AVX512BW-NEXT: retq
128 ; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512:
129 ; AVX512F-32: # %bb.0:
130 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
131 ; AVX512F-32-NEXT: retl
132 %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
135 declare <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16>, <32 x i16>)
137 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
138 ; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
139 ; AVX512BW: ## %bb.0:
140 ; AVX512BW-NEXT: kmovd %edi, %k1
141 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
142 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
143 ; AVX512BW-NEXT: retq
145 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
146 ; AVX512F-32: # %bb.0:
147 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
148 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
149 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
150 ; AVX512F-32-NEXT: retl
151 %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
152 %bc = bitcast i32 %mask to <32 x i1>
153 %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
157 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
158 ; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
159 ; AVX512BW: ## %bb.0:
160 ; AVX512BW-NEXT: kmovd %edi, %k1
161 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
162 ; AVX512BW-NEXT: retq
164 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
165 ; AVX512F-32: # %bb.0:
166 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
167 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
168 ; AVX512F-32-NEXT: retl
169 %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
170 %bc = bitcast i32 %mask to <32 x i1>
171 %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
175 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) {
176 ; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
177 ; AVX512BW: ## %bb.0:
178 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0
179 ; AVX512BW-NEXT: retq
181 ; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
182 ; AVX512F-32: # %bb.0:
183 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
184 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0
185 ; AVX512F-32-NEXT: retl
186 %b = load <32 x i16>, ptr %ptr_b
187 %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
191 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
192 ; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
193 ; AVX512BW: ## %bb.0:
194 ; AVX512BW-NEXT: kmovd %esi, %k1
195 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
196 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
197 ; AVX512BW-NEXT: retq
199 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
200 ; AVX512F-32: # %bb.0:
201 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
202 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
203 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1}
204 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
205 ; AVX512F-32-NEXT: retl
206 %b = load <32 x i16>, ptr %ptr_b
207 %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
208 %bc = bitcast i32 %mask to <32 x i1>
209 %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
213 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
214 ; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
215 ; AVX512BW: ## %bb.0:
216 ; AVX512BW-NEXT: kmovd %esi, %k1
217 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
218 ; AVX512BW-NEXT: retq
220 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
221 ; AVX512F-32: # %bb.0:
222 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
223 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
224 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
225 ; AVX512F-32-NEXT: retl
226 %b = load <32 x i16>, ptr %ptr_b
227 %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
228 %bc = bitcast i32 %mask to <32 x i1>
229 %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
234 define <64 x i16> @test_mask_adds_epi16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
235 ; AVX512BW-LABEL: test_mask_adds_epi16_rr_1024:
236 ; AVX512BW: ## %bb.0:
237 ; AVX512BW-NEXT: vpaddsw %zmm2, %zmm0, %zmm0
238 ; AVX512BW-NEXT: vpaddsw %zmm3, %zmm1, %zmm1
239 ; AVX512BW-NEXT: retq
241 ; AVX512F-32-LABEL: test_mask_adds_epi16_rr_1024:
242 ; AVX512F-32: # %bb.0:
243 ; AVX512F-32-NEXT: pushl %ebp
244 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
245 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
246 ; AVX512F-32-NEXT: movl %esp, %ebp
247 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
248 ; AVX512F-32-NEXT: andl $-64, %esp
249 ; AVX512F-32-NEXT: subl $64, %esp
250 ; AVX512F-32-NEXT: vpaddsw %zmm2, %zmm0, %zmm0
251 ; AVX512F-32-NEXT: vpaddsw 8(%ebp), %zmm1, %zmm1
252 ; AVX512F-32-NEXT: movl %ebp, %esp
253 ; AVX512F-32-NEXT: popl %ebp
254 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
255 ; AVX512F-32-NEXT: retl
256 %1 = call <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16> %a, <64 x i16> %b)
259 declare <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16>, <64 x i16>)
261 define <64 x i16> @test_mask_subs_epi16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
262 ; AVX512BW-LABEL: test_mask_subs_epi16_rr_1024:
263 ; AVX512BW: ## %bb.0:
264 ; AVX512BW-NEXT: vpsubsw %zmm2, %zmm0, %zmm0
265 ; AVX512BW-NEXT: vpsubsw %zmm3, %zmm1, %zmm1
266 ; AVX512BW-NEXT: retq
268 ; AVX512F-32-LABEL: test_mask_subs_epi16_rr_1024:
269 ; AVX512F-32: # %bb.0:
270 ; AVX512F-32-NEXT: pushl %ebp
271 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
272 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
273 ; AVX512F-32-NEXT: movl %esp, %ebp
274 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
275 ; AVX512F-32-NEXT: andl $-64, %esp
276 ; AVX512F-32-NEXT: subl $64, %esp
277 ; AVX512F-32-NEXT: vpsubsw %zmm2, %zmm0, %zmm0
278 ; AVX512F-32-NEXT: vpsubsw 8(%ebp), %zmm1, %zmm1
279 ; AVX512F-32-NEXT: movl %ebp, %esp
280 ; AVX512F-32-NEXT: popl %ebp
281 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
282 ; AVX512F-32-NEXT: retl
283 %sub = call <64 x i16> @llvm.ssub.sat.v64i16(<64 x i16> %a, <64 x i16> %b)
286 declare <64 x i16> @llvm.ssub.sat.v64i16(<64 x i16>, <64 x i16>);
289 ; Unsigned Saturation
292 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
293 ; AVX512BW-LABEL: test_mask_adds_epu16_rr_512:
294 ; AVX512BW: ## %bb.0:
295 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
296 ; AVX512BW-NEXT: retq
298 ; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512:
299 ; AVX512F-32: # %bb.0:
300 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
301 ; AVX512F-32-NEXT: retl
302 %res = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
305 declare <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16>, <32 x i16>)
307 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
308 ; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
309 ; AVX512BW: ## %bb.0:
310 ; AVX512BW-NEXT: kmovd %edi, %k1
311 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
312 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
313 ; AVX512BW-NEXT: retq
315 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
316 ; AVX512F-32: # %bb.0:
317 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
318 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
319 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
320 ; AVX512F-32-NEXT: retl
321 %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
322 %2 = bitcast i32 %mask to <32 x i1>
323 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
327 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
328 ; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
329 ; AVX512BW: ## %bb.0:
330 ; AVX512BW-NEXT: kmovd %edi, %k1
331 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
332 ; AVX512BW-NEXT: retq
334 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
335 ; AVX512F-32: # %bb.0:
336 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
337 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
338 ; AVX512F-32-NEXT: retl
339 %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
340 %2 = bitcast i32 %mask to <32 x i1>
341 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
345 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, ptr %ptr_b) {
346 ; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
347 ; AVX512BW: ## %bb.0:
348 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0
349 ; AVX512BW-NEXT: retq
351 ; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
352 ; AVX512F-32: # %bb.0:
353 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
354 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0
355 ; AVX512F-32-NEXT: retl
356 %b = load <32 x i16>, ptr %ptr_b
357 %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
361 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
362 ; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
363 ; AVX512BW: ## %bb.0:
364 ; AVX512BW-NEXT: kmovd %esi, %k1
365 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
366 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
367 ; AVX512BW-NEXT: retq
369 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
370 ; AVX512F-32: # %bb.0:
371 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
372 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
373 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1}
374 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
375 ; AVX512F-32-NEXT: retl
376 %b = load <32 x i16>, ptr %ptr_b
377 %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
378 %2 = bitcast i32 %mask to <32 x i1>
379 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
383 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
384 ; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
385 ; AVX512BW: ## %bb.0:
386 ; AVX512BW-NEXT: kmovd %esi, %k1
387 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
388 ; AVX512BW-NEXT: retq
390 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
391 ; AVX512F-32: # %bb.0:
392 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
393 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
394 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
395 ; AVX512F-32-NEXT: retl
396 %b = load <32 x i16>, ptr %ptr_b
397 %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
398 %2 = bitcast i32 %mask to <32 x i1>
399 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
403 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
404 ; AVX512BW-LABEL: test_mask_subs_epu16_rr_512:
405 ; AVX512BW: ## %bb.0:
406 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
407 ; AVX512BW-NEXT: retq
409 ; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512:
410 ; AVX512F-32: # %bb.0:
411 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
412 ; AVX512F-32-NEXT: retl
413 %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
416 declare <32 x i16> @llvm.usub.sat.v32i16(<32 x i16>, <32 x i16>)
418 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
419 ; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
420 ; AVX512BW: ## %bb.0:
421 ; AVX512BW-NEXT: kmovd %edi, %k1
422 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
423 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
424 ; AVX512BW-NEXT: retq
426 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
427 ; AVX512F-32: # %bb.0:
428 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
429 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
430 ; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
431 ; AVX512F-32-NEXT: retl
432 %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
433 %bc = bitcast i32 %mask to <32 x i1>
434 %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
438 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
439 ; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
440 ; AVX512BW: ## %bb.0:
441 ; AVX512BW-NEXT: kmovd %edi, %k1
442 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
443 ; AVX512BW-NEXT: retq
445 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
446 ; AVX512F-32: # %bb.0:
447 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
448 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
449 ; AVX512F-32-NEXT: retl
450 %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
451 %bc = bitcast i32 %mask to <32 x i1>
452 %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
456 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, ptr %ptr_b) {
457 ; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
458 ; AVX512BW: ## %bb.0:
459 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0
460 ; AVX512BW-NEXT: retq
462 ; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
463 ; AVX512F-32: # %bb.0:
464 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
465 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0
466 ; AVX512F-32-NEXT: retl
467 %b = load <32 x i16>, ptr %ptr_b
468 %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
472 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) {
473 ; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
474 ; AVX512BW: ## %bb.0:
475 ; AVX512BW-NEXT: kmovd %esi, %k1
476 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
477 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
478 ; AVX512BW-NEXT: retq
480 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
481 ; AVX512F-32: # %bb.0:
482 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
483 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
484 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1}
485 ; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
486 ; AVX512F-32-NEXT: retl
487 %b = load <32 x i16>, ptr %ptr_b
488 %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
489 %bc = bitcast i32 %mask to <32 x i1>
490 %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
494 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) {
495 ; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
496 ; AVX512BW: ## %bb.0:
497 ; AVX512BW-NEXT: kmovd %esi, %k1
498 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
499 ; AVX512BW-NEXT: retq
501 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
502 ; AVX512F-32: # %bb.0:
503 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
504 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
505 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
506 ; AVX512F-32-NEXT: retl
507 %b = load <32 x i16>, ptr %ptr_b
508 %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
509 %bc = bitcast i32 %mask to <32 x i1>
510 %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
515 define <64 x i16> @test_mask_adds_epu16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
516 ; AVX512BW-LABEL: test_mask_adds_epu16_rr_1024:
517 ; AVX512BW: ## %bb.0:
518 ; AVX512BW-NEXT: vpaddusw %zmm2, %zmm0, %zmm0
519 ; AVX512BW-NEXT: vpaddusw %zmm3, %zmm1, %zmm1
520 ; AVX512BW-NEXT: retq
522 ; AVX512F-32-LABEL: test_mask_adds_epu16_rr_1024:
523 ; AVX512F-32: # %bb.0:
524 ; AVX512F-32-NEXT: pushl %ebp
525 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
526 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
527 ; AVX512F-32-NEXT: movl %esp, %ebp
528 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
529 ; AVX512F-32-NEXT: andl $-64, %esp
530 ; AVX512F-32-NEXT: subl $64, %esp
531 ; AVX512F-32-NEXT: vpaddusw %zmm2, %zmm0, %zmm0
532 ; AVX512F-32-NEXT: vpaddusw 8(%ebp), %zmm1, %zmm1
533 ; AVX512F-32-NEXT: movl %ebp, %esp
534 ; AVX512F-32-NEXT: popl %ebp
535 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
536 ; AVX512F-32-NEXT: retl
537 %1 = call <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16> %a, <64 x i16> %b)
540 declare <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16>, <64 x i16>)
542 define <64 x i16> @test_mask_subs_epu16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
543 ; AVX512BW-LABEL: test_mask_subs_epu16_rr_1024:
544 ; AVX512BW: ## %bb.0:
545 ; AVX512BW-NEXT: vpsubusw %zmm2, %zmm0, %zmm0
546 ; AVX512BW-NEXT: vpsubusw %zmm3, %zmm1, %zmm1
547 ; AVX512BW-NEXT: retq
549 ; AVX512F-32-LABEL: test_mask_subs_epu16_rr_1024:
550 ; AVX512F-32: # %bb.0:
551 ; AVX512F-32-NEXT: pushl %ebp
552 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
553 ; AVX512F-32-NEXT: .cfi_offset %ebp, -8
554 ; AVX512F-32-NEXT: movl %esp, %ebp
555 ; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
556 ; AVX512F-32-NEXT: andl $-64, %esp
557 ; AVX512F-32-NEXT: subl $64, %esp
558 ; AVX512F-32-NEXT: vpsubusw %zmm2, %zmm0, %zmm0
559 ; AVX512F-32-NEXT: vpsubusw 8(%ebp), %zmm1, %zmm1
560 ; AVX512F-32-NEXT: movl %ebp, %esp
561 ; AVX512F-32-NEXT: popl %ebp
562 ; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
563 ; AVX512F-32-NEXT: retl
564 %sub = call <64 x i16> @llvm.usub.sat.v64i16(<64 x i16> %a, <64 x i16> %b)
567 declare <64 x i16> @llvm.usub.sat.v64i16(<64 x i16>, <64 x i16>)