1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,SKX
5 attributes #0 = { nounwind }
7 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
8 ; ALL-LABEL: trunc_16x32_to_16x8:
10 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
11 ; ALL-NEXT: vzeroupper
13 %x = trunc <16 x i32> %i to <16 x i8>
17 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
18 ; ALL-LABEL: trunc_8x64_to_8x16:
20 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
21 ; ALL-NEXT: vzeroupper
23 %x = trunc <8 x i64> %i to <8 x i16>
27 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
28 ; ALL-LABEL: trunc_v16i32_to_v16i16:
30 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
32 %1 = trunc <16 x i32> %x to <16 x i16>
36 define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
37 ; ALL-LABEL: trunc_qb_512:
39 ; ALL-NEXT: vpmovqb %zmm0, %xmm0
40 ; ALL-NEXT: vzeroupper
42 %x = trunc <8 x i64> %i to <8 x i8>
46 define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
47 ; ALL-LABEL: trunc_qb_512_mem:
49 ; ALL-NEXT: vpmovqb %zmm0, (%rdi)
50 ; ALL-NEXT: vzeroupper
52 %x = trunc <8 x i64> %i to <8 x i8>
53 store <8 x i8> %x, <8 x i8>* %res
57 define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
58 ; KNL-LABEL: trunc_qb_256:
60 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
61 ; KNL-NEXT: vpmovqb %zmm0, %xmm0
62 ; KNL-NEXT: vzeroupper
65 ; SKX-LABEL: trunc_qb_256:
67 ; SKX-NEXT: vpmovqb %ymm0, %xmm0
68 ; SKX-NEXT: vzeroupper
70 %x = trunc <4 x i64> %i to <4 x i8>
74 define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
75 ; KNL-LABEL: trunc_qb_256_mem:
77 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
78 ; KNL-NEXT: vpmovqb %zmm0, %xmm0
79 ; KNL-NEXT: vmovd %xmm0, (%rdi)
80 ; KNL-NEXT: vzeroupper
83 ; SKX-LABEL: trunc_qb_256_mem:
85 ; SKX-NEXT: vpmovqb %ymm0, (%rdi)
86 ; SKX-NEXT: vzeroupper
88 %x = trunc <4 x i64> %i to <4 x i8>
89 store <4 x i8> %x, <4 x i8>* %res
93 define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
94 ; ALL-LABEL: trunc_qb_128:
96 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
98 %x = trunc <2 x i64> %i to <2 x i8>
102 define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
103 ; KNL-LABEL: trunc_qb_128_mem:
105 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
106 ; KNL-NEXT: vpextrw $0, %xmm0, (%rdi)
109 ; SKX-LABEL: trunc_qb_128_mem:
111 ; SKX-NEXT: vpmovqb %xmm0, (%rdi)
113 %x = trunc <2 x i64> %i to <2 x i8>
114 store <2 x i8> %x, <2 x i8>* %res
118 define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
119 ; ALL-LABEL: trunc_qw_512:
121 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
122 ; ALL-NEXT: vzeroupper
124 %x = trunc <8 x i64> %i to <8 x i16>
128 define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
129 ; ALL-LABEL: trunc_qw_512_mem:
131 ; ALL-NEXT: vpmovqw %zmm0, (%rdi)
132 ; ALL-NEXT: vzeroupper
134 %x = trunc <8 x i64> %i to <8 x i16>
135 store <8 x i16> %x, <8 x i16>* %res
139 define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
140 ; KNL-LABEL: trunc_qw_256:
142 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
143 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
144 ; KNL-NEXT: vzeroupper
147 ; SKX-LABEL: trunc_qw_256:
149 ; SKX-NEXT: vpmovqw %ymm0, %xmm0
150 ; SKX-NEXT: vzeroupper
152 %x = trunc <4 x i64> %i to <4 x i16>
156 define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
157 ; KNL-LABEL: trunc_qw_256_mem:
159 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
160 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
161 ; KNL-NEXT: vmovq %xmm0, (%rdi)
162 ; KNL-NEXT: vzeroupper
165 ; SKX-LABEL: trunc_qw_256_mem:
167 ; SKX-NEXT: vpmovqw %ymm0, (%rdi)
168 ; SKX-NEXT: vzeroupper
170 %x = trunc <4 x i64> %i to <4 x i16>
171 store <4 x i16> %x, <4 x i16>* %res
175 define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
176 ; KNL-LABEL: trunc_qw_128:
178 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
179 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
182 ; SKX-LABEL: trunc_qw_128:
184 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
186 %x = trunc <2 x i64> %i to <2 x i16>
190 define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
191 ; KNL-LABEL: trunc_qw_128_mem:
193 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
194 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
195 ; KNL-NEXT: vmovd %xmm0, (%rdi)
198 ; SKX-LABEL: trunc_qw_128_mem:
200 ; SKX-NEXT: vpmovqw %xmm0, (%rdi)
202 %x = trunc <2 x i64> %i to <2 x i16>
203 store <2 x i16> %x, <2 x i16>* %res
207 define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
208 ; ALL-LABEL: trunc_qd_512:
210 ; ALL-NEXT: vpmovqd %zmm0, %ymm0
212 %x = trunc <8 x i64> %i to <8 x i32>
216 define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
217 ; ALL-LABEL: trunc_qd_512_mem:
219 ; ALL-NEXT: vpmovqd %zmm0, (%rdi)
220 ; ALL-NEXT: vzeroupper
222 %x = trunc <8 x i64> %i to <8 x i32>
223 store <8 x i32> %x, <8 x i32>* %res
227 define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
228 ; KNL-LABEL: trunc_qd_256:
230 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
231 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
232 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
233 ; KNL-NEXT: vzeroupper
236 ; SKX-LABEL: trunc_qd_256:
238 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
239 ; SKX-NEXT: vzeroupper
241 %x = trunc <4 x i64> %i to <4 x i32>
245 define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
246 ; KNL-LABEL: trunc_qd_256_mem:
248 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
249 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
250 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
251 ; KNL-NEXT: vzeroupper
254 ; SKX-LABEL: trunc_qd_256_mem:
256 ; SKX-NEXT: vpmovqd %ymm0, (%rdi)
257 ; SKX-NEXT: vzeroupper
259 %x = trunc <4 x i64> %i to <4 x i32>
260 store <4 x i32> %x, <4 x i32>* %res
264 define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
265 ; ALL-LABEL: trunc_qd_128:
267 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
269 %x = trunc <2 x i64> %i to <2 x i32>
273 define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
274 ; KNL-LABEL: trunc_qd_128_mem:
276 ; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
277 ; KNL-NEXT: vmovlps %xmm0, (%rdi)
280 ; SKX-LABEL: trunc_qd_128_mem:
282 ; SKX-NEXT: vpmovqd %xmm0, (%rdi)
284 %x = trunc <2 x i64> %i to <2 x i32>
285 store <2 x i32> %x, <2 x i32>* %res
289 define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
290 ; ALL-LABEL: trunc_db_512:
292 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
293 ; ALL-NEXT: vzeroupper
295 %x = trunc <16 x i32> %i to <16 x i8>
299 define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
300 ; ALL-LABEL: trunc_db_512_mem:
302 ; ALL-NEXT: vpmovdb %zmm0, (%rdi)
303 ; ALL-NEXT: vzeroupper
305 %x = trunc <16 x i32> %i to <16 x i8>
306 store <16 x i8> %x, <16 x i8>* %res
310 define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
311 ; KNL-LABEL: trunc_db_256:
313 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
314 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
315 ; KNL-NEXT: vzeroupper
318 ; SKX-LABEL: trunc_db_256:
320 ; SKX-NEXT: vpmovdb %ymm0, %xmm0
321 ; SKX-NEXT: vzeroupper
323 %x = trunc <8 x i32> %i to <8 x i8>
327 define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
328 ; KNL-LABEL: trunc_db_256_mem:
330 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
331 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
332 ; KNL-NEXT: vmovq %xmm0, (%rdi)
333 ; KNL-NEXT: vzeroupper
336 ; SKX-LABEL: trunc_db_256_mem:
338 ; SKX-NEXT: vpmovdb %ymm0, (%rdi)
339 ; SKX-NEXT: vzeroupper
341 %x = trunc <8 x i32> %i to <8 x i8>
342 store <8 x i8> %x, <8 x i8>* %res
346 define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
347 ; ALL-LABEL: trunc_db_128:
349 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
351 %x = trunc <4 x i32> %i to <4 x i8>
355 define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
356 ; KNL-LABEL: trunc_db_128_mem:
358 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
359 ; KNL-NEXT: vmovd %xmm0, (%rdi)
362 ; SKX-LABEL: trunc_db_128_mem:
364 ; SKX-NEXT: vpmovdb %xmm0, (%rdi)
366 %x = trunc <4 x i32> %i to <4 x i8>
367 store <4 x i8> %x, <4 x i8>* %res
371 define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
372 ; ALL-LABEL: trunc_dw_512:
374 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
376 %x = trunc <16 x i32> %i to <16 x i16>
380 define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
381 ; ALL-LABEL: trunc_dw_512_mem:
383 ; ALL-NEXT: vpmovdw %zmm0, (%rdi)
384 ; ALL-NEXT: vzeroupper
386 %x = trunc <16 x i32> %i to <16 x i16>
387 store <16 x i16> %x, <16 x i16>* %res
391 define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
392 ; KNL-LABEL: trunc_dw_256:
394 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
395 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
396 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
397 ; KNL-NEXT: vzeroupper
400 ; SKX-LABEL: trunc_dw_256:
402 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
403 ; SKX-NEXT: vzeroupper
405 %x = trunc <8 x i32> %i to <8 x i16>
409 define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
410 ; KNL-LABEL: trunc_dw_256_mem:
412 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
413 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
414 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
415 ; KNL-NEXT: vzeroupper
418 ; SKX-LABEL: trunc_dw_256_mem:
420 ; SKX-NEXT: vpmovdw %ymm0, (%rdi)
421 ; SKX-NEXT: vzeroupper
423 %x = trunc <8 x i32> %i to <8 x i16>
424 store <8 x i16> %x, <8 x i16>* %res
428 define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
429 ; KNL-LABEL: trunc_dw_128_mem:
431 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
432 ; KNL-NEXT: vmovq %xmm0, (%rdi)
435 ; SKX-LABEL: trunc_dw_128_mem:
437 ; SKX-NEXT: vpmovdw %xmm0, (%rdi)
439 %x = trunc <4 x i32> %i to <4 x i16>
440 store <4 x i16> %x, <4 x i16>* %res
444 define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
445 ; KNL-LABEL: trunc_wb_512:
447 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
448 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
449 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
450 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
451 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
454 ; SKX-LABEL: trunc_wb_512:
456 ; SKX-NEXT: vpmovwb %zmm0, %ymm0
458 %x = trunc <32 x i16> %i to <32 x i8>
462 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
463 ; KNL-LABEL: trunc_wb_512_mem:
465 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
466 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
467 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
468 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
469 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
470 ; KNL-NEXT: vmovdqa %ymm0, (%rdi)
471 ; KNL-NEXT: vzeroupper
474 ; SKX-LABEL: trunc_wb_512_mem:
476 ; SKX-NEXT: vpmovwb %zmm0, (%rdi)
477 ; SKX-NEXT: vzeroupper
479 %x = trunc <32 x i16> %i to <32 x i8>
480 store <32 x i8> %x, <32 x i8>* %res
484 define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
485 ; KNL-LABEL: trunc_wb_256:
487 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
488 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
489 ; KNL-NEXT: vzeroupper
492 ; SKX-LABEL: trunc_wb_256:
494 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
495 ; SKX-NEXT: vzeroupper
497 %x = trunc <16 x i16> %i to <16 x i8>
501 define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
502 ; KNL-LABEL: trunc_wb_256_mem:
504 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
505 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
506 ; KNL-NEXT: vzeroupper
509 ; SKX-LABEL: trunc_wb_256_mem:
511 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
512 ; SKX-NEXT: vzeroupper
514 %x = trunc <16 x i16> %i to <16 x i8>
515 store <16 x i8> %x, <16 x i8>* %res
519 define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
520 ; ALL-LABEL: trunc_wb_128:
522 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
524 %x = trunc <8 x i16> %i to <8 x i8>
528 define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
529 ; KNL-LABEL: trunc_wb_128_mem:
531 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
532 ; KNL-NEXT: vmovq %xmm0, (%rdi)
535 ; SKX-LABEL: trunc_wb_128_mem:
537 ; SKX-NEXT: vpmovwb %xmm0, (%rdi)
539 %x = trunc <8 x i16> %i to <8 x i8>
540 store <8 x i8> %x, <8 x i8>* %res
545 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
546 ; KNL-LABEL: usat_trunc_wb_256_mem:
548 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
549 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
550 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
551 ; KNL-NEXT: vzeroupper
554 ; SKX-LABEL: usat_trunc_wb_256_mem:
556 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
557 ; SKX-NEXT: vzeroupper
559 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
560 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
561 %x6 = trunc <16 x i16> %x5 to <16 x i8>
562 store <16 x i8> %x6, <16 x i8>* %res, align 1
566 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
567 ; KNL-LABEL: usat_trunc_wb_256:
569 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
570 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
571 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
572 ; KNL-NEXT: vzeroupper
575 ; SKX-LABEL: usat_trunc_wb_256:
577 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
578 ; SKX-NEXT: vzeroupper
580 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
581 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
582 %x6 = trunc <16 x i16> %x5 to <16 x i8>
586 define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
587 ; KNL-LABEL: usat_trunc_wb_128_mem:
589 ; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
590 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
591 ; KNL-NEXT: vmovq %xmm0, (%rdi)
594 ; SKX-LABEL: usat_trunc_wb_128_mem:
596 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
598 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
599 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
600 %x6 = trunc <8 x i16> %x5 to <8 x i8>
601 store <8 x i8> %x6, <8 x i8>* %res, align 1
605 define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
606 ; ALL-LABEL: usat_trunc_db_512_mem:
608 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
609 ; ALL-NEXT: vzeroupper
611 %x3 = icmp ult <16 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
612 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
613 %x6 = trunc <16 x i32> %x5 to <16 x i8>
614 store <16 x i8> %x6, <16 x i8>* %res, align 1
618 define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
619 ; ALL-LABEL: usat_trunc_qb_512_mem:
621 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
622 ; ALL-NEXT: vzeroupper
624 %x3 = icmp ult <8 x i64> %i, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
625 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
626 %x6 = trunc <8 x i64> %x5 to <8 x i8>
627 store <8 x i8> %x6, <8 x i8>* %res, align 1
631 define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
632 ; ALL-LABEL: usat_trunc_qd_512_mem:
634 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
635 ; ALL-NEXT: vzeroupper
637 %x3 = icmp ult <8 x i64> %i, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
638 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
639 %x6 = trunc <8 x i64> %x5 to <8 x i32>
640 store <8 x i32> %x6, <8 x i32>* %res, align 1
644 define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
645 ; ALL-LABEL: usat_trunc_qw_512_mem:
647 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
648 ; ALL-NEXT: vzeroupper
650 %x3 = icmp ult <8 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
651 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
652 %x6 = trunc <8 x i64> %x5 to <8 x i16>
653 store <8 x i16> %x6, <8 x i16>* %res, align 1
657 define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
658 ; ALL-LABEL: usat_trunc_db_1024:
660 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
661 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
662 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
664 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
665 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
666 %x6 = trunc <32 x i32> %x5 to <32 x i8>
670 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
671 ; ALL-LABEL: usat_trunc_db_1024_mem:
673 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
674 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
675 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
676 ; ALL-NEXT: vmovdqu %ymm0, (%rdi)
677 ; ALL-NEXT: vzeroupper
679 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
680 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
681 %x6 = trunc <32 x i32> %x5 to <32 x i8>
682 store <32 x i8>%x6, <32 x i8>* %p, align 1
686 define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
687 ; ALL-LABEL: usat_trunc_dw_512:
689 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
691 %x3 = icmp ult <16 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
692 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
693 %x6 = trunc <16 x i32> %x5 to <16 x i16>
697 define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
698 ; ALL-LABEL: usat_trunc_wb_128:
700 ; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
701 ; ALL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
703 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
704 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
705 %x6 = trunc <8 x i16> %x5 to <8 x i8>
709 define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
710 ; ALL-LABEL: usat_trunc_qw_1024:
712 ; ALL-NEXT: vpmovusqw %zmm0, %xmm0
713 ; ALL-NEXT: vpmovusqw %zmm1, %xmm1
714 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
716 %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
717 %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
718 %x6 = trunc <16 x i64> %x5 to <16 x i16>
722 define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
723 ; KNL-LABEL: usat_trunc_db_256:
725 ; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
726 ; KNL-NEXT: vpminud %ymm1, %ymm0, %ymm0
727 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
728 ; KNL-NEXT: vzeroupper
731 ; SKX-LABEL: usat_trunc_db_256:
733 ; SKX-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0
734 ; SKX-NEXT: vpmovdb %ymm0, %xmm0
735 ; SKX-NEXT: vzeroupper
737 %tmp1 = icmp ult <8 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
738 %tmp2 = select <8 x i1> %tmp1, <8 x i32> %x, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
739 %tmp3 = trunc <8 x i32> %tmp2 to <8 x i8>
740 %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
746 ; Tests for the following unsigned saturation pattern:
748 ; %a = icmp sgt %x, C1
749 ; %b = select %a, %x, C2
750 ; %c = icmp slt %b, C2
751 ; %d = select %c, %b, C2
755 define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, <16 x i8>* %res) {
756 ; KNL-LABEL: smax_usat_trunc_wb_256_mem1:
758 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
759 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
760 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
761 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
762 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
763 ; KNL-NEXT: vzeroupper
766 ; SKX-LABEL: smax_usat_trunc_wb_256_mem1:
768 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
769 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
770 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
771 ; SKX-NEXT: vzeroupper
773 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
774 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
775 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
776 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
777 %x6 = trunc <16 x i16> %x5 to <16 x i8>
778 store <16 x i8> %x6, <16 x i8>* %res, align 1
782 ; Test for smax(smin(x, C2), C1).
783 define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, <16 x i8>* %res) {
784 ; KNL-LABEL: smax_usat_trunc_wb_256_mem2:
786 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
787 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
788 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
789 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
790 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
791 ; KNL-NEXT: vzeroupper
794 ; SKX-LABEL: smax_usat_trunc_wb_256_mem2:
796 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
797 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
798 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
799 ; SKX-NEXT: vzeroupper
801 %x1 = icmp slt <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
802 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
803 %x3 = icmp sgt <16 x i16> %x2, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
804 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
805 %x6 = trunc <16 x i16> %x5 to <16 x i8>
806 store <16 x i8> %x6, <16 x i8>* %res, align 1
810 define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
811 ; KNL-LABEL: smax_usat_trunc_wb_256:
813 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
814 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
815 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
816 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
817 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
818 ; KNL-NEXT: vzeroupper
821 ; SKX-LABEL: smax_usat_trunc_wb_256:
823 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
824 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
825 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
826 ; SKX-NEXT: vzeroupper
828 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
829 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
830 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
831 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
832 %x6 = trunc <16 x i16> %x5 to <16 x i8>
836 define void @smax_usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
837 ; KNL-LABEL: smax_usat_trunc_wb_128_mem:
839 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
840 ; KNL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
841 ; KNL-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
842 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
843 ; KNL-NEXT: vmovq %xmm0, (%rdi)
846 ; SKX-LABEL: smax_usat_trunc_wb_128_mem:
848 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
849 ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
850 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
852 %x1 = icmp sgt <8 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
853 %x2 = select <8 x i1> %x1, <8 x i16> %i, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
854 %x3 = icmp slt <8 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
855 %x5 = select <8 x i1> %x3, <8 x i16> %x2, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
856 %x6 = trunc <8 x i16> %x5 to <8 x i8>
857 store <8 x i8> %x6, <8 x i8>* %res, align 1
861 define void @smax_usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
862 ; ALL-LABEL: smax_usat_trunc_db_512_mem:
864 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
865 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
866 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
867 ; ALL-NEXT: vzeroupper
869 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
870 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
871 %x3 = icmp slt <16 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
872 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
873 %x6 = trunc <16 x i32> %x5 to <16 x i8>
874 store <16 x i8> %x6, <16 x i8>* %res, align 1
878 define void @smax_usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
879 ; ALL-LABEL: smax_usat_trunc_qb_512_mem:
881 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
882 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
883 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
884 ; ALL-NEXT: vzeroupper
886 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
887 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
888 %x3 = icmp slt <8 x i64> %x2, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
889 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
890 %x6 = trunc <8 x i64> %x5 to <8 x i8>
891 store <8 x i8> %x6, <8 x i8>* %res, align 1
895 define void @smax_usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
896 ; ALL-LABEL: smax_usat_trunc_qd_512_mem:
898 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
899 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
900 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
901 ; ALL-NEXT: vzeroupper
903 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
904 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
905 %x3 = icmp slt <8 x i64> %x2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
906 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
907 %x6 = trunc <8 x i64> %x5 to <8 x i32>
908 store <8 x i32> %x6, <8 x i32>* %res, align 1
912 define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
913 ; ALL-LABEL: smax_usat_trunc_qw_512_mem:
915 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
916 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
917 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
918 ; ALL-NEXT: vzeroupper
920 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
921 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
922 %x3 = icmp slt <8 x i64> %x2, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
923 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
924 %x6 = trunc <8 x i64> %x5 to <8 x i16>
925 store <8 x i16> %x6, <8 x i16>* %res, align 1
929 define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
930 ; ALL-LABEL: smax_usat_trunc_db_1024:
932 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
933 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
934 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
935 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
936 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
937 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
939 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
940 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
941 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
942 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
943 %x6 = trunc <32 x i32> %x5 to <32 x i8>
947 define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
948 ; ALL-LABEL: smax_usat_trunc_db_1024_mem:
950 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
951 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
952 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
953 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
954 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
955 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
956 ; ALL-NEXT: vmovdqu %ymm0, (%rdi)
957 ; ALL-NEXT: vzeroupper
959 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
960 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
961 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
962 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
963 %x6 = trunc <32 x i32> %x5 to <32 x i8>
964 store <32 x i8>%x6, <32 x i8>* %p, align 1
968 define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) {
969 ; ALL-LABEL: smax_usat_trunc_dw_512:
971 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
972 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
973 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
975 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
976 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
977 %x3 = icmp slt <16 x i32> %x2, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
978 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
979 %x6 = trunc <16 x i32> %x5 to <16 x i16>
983 define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
984 ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
986 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
987 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
988 ; KNL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
989 ; KNL-NEXT: vpminsw %ymm1, %ymm0, %ymm0
990 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
991 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
992 ; KNL-NEXT: vzeroupper
995 ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
997 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
998 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
999 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
1000 ; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1001 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1002 ; SKX-NEXT: vzeroupper
1004 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
1005 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
1006 %x3 = icmp slt <16 x i16> %x2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1007 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1008 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1009 store <16 x i8> %x6, <16 x i8>* %res, align 1
1013 define void @negative_test2_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
1014 ; KNL-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1016 ; KNL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1017 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1018 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1019 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
1020 ; KNL-NEXT: vzeroupper
1023 ; SKX-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1025 ; SKX-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1026 ; SKX-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1027 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1028 ; SKX-NEXT: vzeroupper
1030 %x1 = icmp sgt <16 x i16> %i, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1031 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1032 %x3 = icmp slt <16 x i16> %x2, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1033 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1034 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1035 store <16 x i8> %x6, <16 x i8>* %res, align 1