1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,SKX
5 attributes #0 = { nounwind }
7 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
8 ; ALL-LABEL: trunc_16x32_to_16x8:
10 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
11 ; ALL-NEXT: vzeroupper
13 %x = trunc <16 x i32> %i to <16 x i8>
17 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
18 ; ALL-LABEL: trunc_8x64_to_8x16:
20 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
21 ; ALL-NEXT: vzeroupper
23 %x = trunc <8 x i64> %i to <8 x i16>
27 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
28 ; ALL-LABEL: trunc_v16i32_to_v16i16:
30 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
32 %1 = trunc <16 x i32> %x to <16 x i16>
36 define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
37 ; ALL-LABEL: trunc_qb_512:
39 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
40 ; ALL-NEXT: vzeroupper
42 %x = trunc <8 x i64> %i to <8 x i8>
46 define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
47 ; ALL-LABEL: trunc_qb_512_mem:
49 ; ALL-NEXT: vpmovqb %zmm0, (%rdi)
50 ; ALL-NEXT: vzeroupper
52 %x = trunc <8 x i64> %i to <8 x i8>
53 store <8 x i8> %x, <8 x i8>* %res
57 define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
58 ; KNL-LABEL: trunc_qb_256:
60 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
61 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
62 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
63 ; KNL-NEXT: vzeroupper
66 ; SKX-LABEL: trunc_qb_256:
68 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
69 ; SKX-NEXT: vzeroupper
71 %x = trunc <4 x i64> %i to <4 x i8>
75 define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
76 ; KNL-LABEL: trunc_qb_256_mem:
78 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
79 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
80 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
81 ; KNL-NEXT: vmovd %xmm0, (%rdi)
82 ; KNL-NEXT: vzeroupper
85 ; SKX-LABEL: trunc_qb_256_mem:
87 ; SKX-NEXT: vpmovqb %ymm0, (%rdi)
88 ; SKX-NEXT: vzeroupper
90 %x = trunc <4 x i64> %i to <4 x i8>
91 store <4 x i8> %x, <4 x i8>* %res
95 define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
96 ; ALL-LABEL: trunc_qb_128:
99 %x = trunc <2 x i64> %i to <2 x i8>
103 define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
104 ; KNL-LABEL: trunc_qb_128_mem:
106 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
107 ; KNL-NEXT: vpextrw $0, %xmm0, (%rdi)
110 ; SKX-LABEL: trunc_qb_128_mem:
112 ; SKX-NEXT: vpmovqb %xmm0, (%rdi)
114 %x = trunc <2 x i64> %i to <2 x i8>
115 store <2 x i8> %x, <2 x i8>* %res
119 define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
120 ; ALL-LABEL: trunc_qw_512:
122 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
123 ; ALL-NEXT: vzeroupper
125 %x = trunc <8 x i64> %i to <8 x i16>
129 define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
130 ; ALL-LABEL: trunc_qw_512_mem:
132 ; ALL-NEXT: vpmovqw %zmm0, (%rdi)
133 ; ALL-NEXT: vzeroupper
135 %x = trunc <8 x i64> %i to <8 x i16>
136 store <8 x i16> %x, <8 x i16>* %res
140 define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
141 ; KNL-LABEL: trunc_qw_256:
143 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
144 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
145 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
146 ; KNL-NEXT: vzeroupper
149 ; SKX-LABEL: trunc_qw_256:
151 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
152 ; SKX-NEXT: vzeroupper
154 %x = trunc <4 x i64> %i to <4 x i16>
158 define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
159 ; KNL-LABEL: trunc_qw_256_mem:
161 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
162 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
163 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
164 ; KNL-NEXT: vmovq %xmm0, (%rdi)
165 ; KNL-NEXT: vzeroupper
168 ; SKX-LABEL: trunc_qw_256_mem:
170 ; SKX-NEXT: vpmovqw %ymm0, (%rdi)
171 ; SKX-NEXT: vzeroupper
173 %x = trunc <4 x i64> %i to <4 x i16>
174 store <4 x i16> %x, <4 x i16>* %res
178 define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
179 ; ALL-LABEL: trunc_qw_128:
182 %x = trunc <2 x i64> %i to <2 x i16>
186 define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
187 ; KNL-LABEL: trunc_qw_128_mem:
189 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
190 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
191 ; KNL-NEXT: vmovd %xmm0, (%rdi)
194 ; SKX-LABEL: trunc_qw_128_mem:
196 ; SKX-NEXT: vpmovqw %xmm0, (%rdi)
198 %x = trunc <2 x i64> %i to <2 x i16>
199 store <2 x i16> %x, <2 x i16>* %res
203 define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
204 ; ALL-LABEL: trunc_qd_512:
206 ; ALL-NEXT: vpmovqd %zmm0, %ymm0
208 %x = trunc <8 x i64> %i to <8 x i32>
212 define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
213 ; ALL-LABEL: trunc_qd_512_mem:
215 ; ALL-NEXT: vpmovqd %zmm0, (%rdi)
216 ; ALL-NEXT: vzeroupper
218 %x = trunc <8 x i64> %i to <8 x i32>
219 store <8 x i32> %x, <8 x i32>* %res
223 define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
224 ; KNL-LABEL: trunc_qd_256:
226 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
227 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
228 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
229 ; KNL-NEXT: vzeroupper
232 ; SKX-LABEL: trunc_qd_256:
234 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
235 ; SKX-NEXT: vzeroupper
237 %x = trunc <4 x i64> %i to <4 x i32>
241 define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
242 ; KNL-LABEL: trunc_qd_256_mem:
244 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
245 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
246 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
247 ; KNL-NEXT: vzeroupper
250 ; SKX-LABEL: trunc_qd_256_mem:
252 ; SKX-NEXT: vpmovqd %ymm0, (%rdi)
253 ; SKX-NEXT: vzeroupper
255 %x = trunc <4 x i64> %i to <4 x i32>
256 store <4 x i32> %x, <4 x i32>* %res
260 define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
261 ; ALL-LABEL: trunc_qd_128:
264 %x = trunc <2 x i64> %i to <2 x i32>
268 define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
269 ; KNL-LABEL: trunc_qd_128_mem:
271 ; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
272 ; KNL-NEXT: vmovlps %xmm0, (%rdi)
275 ; SKX-LABEL: trunc_qd_128_mem:
277 ; SKX-NEXT: vpmovqd %xmm0, (%rdi)
279 %x = trunc <2 x i64> %i to <2 x i32>
280 store <2 x i32> %x, <2 x i32>* %res
284 define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
285 ; ALL-LABEL: trunc_db_512:
287 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
288 ; ALL-NEXT: vzeroupper
290 %x = trunc <16 x i32> %i to <16 x i8>
294 define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
295 ; ALL-LABEL: trunc_db_512_mem:
297 ; ALL-NEXT: vpmovdb %zmm0, (%rdi)
298 ; ALL-NEXT: vzeroupper
300 %x = trunc <16 x i32> %i to <16 x i8>
301 store <16 x i8> %x, <16 x i8>* %res
305 define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
306 ; KNL-LABEL: trunc_db_256:
308 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
309 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
310 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
311 ; KNL-NEXT: vzeroupper
314 ; SKX-LABEL: trunc_db_256:
316 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
317 ; SKX-NEXT: vzeroupper
319 %x = trunc <8 x i32> %i to <8 x i8>
323 define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
324 ; KNL-LABEL: trunc_db_256_mem:
326 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
327 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
328 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
329 ; KNL-NEXT: vmovq %xmm0, (%rdi)
330 ; KNL-NEXT: vzeroupper
333 ; SKX-LABEL: trunc_db_256_mem:
335 ; SKX-NEXT: vpmovdb %ymm0, (%rdi)
336 ; SKX-NEXT: vzeroupper
338 %x = trunc <8 x i32> %i to <8 x i8>
339 store <8 x i8> %x, <8 x i8>* %res
343 define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
344 ; ALL-LABEL: trunc_db_128:
347 %x = trunc <4 x i32> %i to <4 x i8>
351 define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
352 ; KNL-LABEL: trunc_db_128_mem:
354 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
355 ; KNL-NEXT: vmovd %xmm0, (%rdi)
358 ; SKX-LABEL: trunc_db_128_mem:
360 ; SKX-NEXT: vpmovdb %xmm0, (%rdi)
362 %x = trunc <4 x i32> %i to <4 x i8>
363 store <4 x i8> %x, <4 x i8>* %res
367 define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
368 ; ALL-LABEL: trunc_dw_512:
370 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
372 %x = trunc <16 x i32> %i to <16 x i16>
376 define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
377 ; ALL-LABEL: trunc_dw_512_mem:
379 ; ALL-NEXT: vpmovdw %zmm0, (%rdi)
380 ; ALL-NEXT: vzeroupper
382 %x = trunc <16 x i32> %i to <16 x i16>
383 store <16 x i16> %x, <16 x i16>* %res
387 define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
388 ; KNL-LABEL: trunc_dw_256:
390 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
391 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
392 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
393 ; KNL-NEXT: vzeroupper
396 ; SKX-LABEL: trunc_dw_256:
398 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
399 ; SKX-NEXT: vzeroupper
401 %x = trunc <8 x i32> %i to <8 x i16>
405 define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
406 ; KNL-LABEL: trunc_dw_256_mem:
408 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
409 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
410 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
411 ; KNL-NEXT: vzeroupper
414 ; SKX-LABEL: trunc_dw_256_mem:
416 ; SKX-NEXT: vpmovdw %ymm0, (%rdi)
417 ; SKX-NEXT: vzeroupper
419 %x = trunc <8 x i32> %i to <8 x i16>
420 store <8 x i16> %x, <8 x i16>* %res
424 define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
425 ; KNL-LABEL: trunc_dw_128_mem:
427 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
428 ; KNL-NEXT: vmovq %xmm0, (%rdi)
431 ; SKX-LABEL: trunc_dw_128_mem:
433 ; SKX-NEXT: vpmovdw %xmm0, (%rdi)
435 %x = trunc <4 x i32> %i to <4 x i16>
436 store <4 x i16> %x, <4 x i16>* %res
440 define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
441 ; KNL-LABEL: trunc_wb_512:
443 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
444 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
445 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
446 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
447 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
450 ; SKX-LABEL: trunc_wb_512:
452 ; SKX-NEXT: vpmovwb %zmm0, %ymm0
454 %x = trunc <32 x i16> %i to <32 x i8>
458 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
459 ; KNL-LABEL: trunc_wb_512_mem:
461 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
462 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
463 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
464 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
465 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
466 ; KNL-NEXT: vmovdqa %ymm0, (%rdi)
467 ; KNL-NEXT: vzeroupper
470 ; SKX-LABEL: trunc_wb_512_mem:
472 ; SKX-NEXT: vpmovwb %zmm0, (%rdi)
473 ; SKX-NEXT: vzeroupper
475 %x = trunc <32 x i16> %i to <32 x i8>
476 store <32 x i8> %x, <32 x i8>* %res
480 define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
481 ; KNL-LABEL: trunc_wb_256:
483 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
484 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
485 ; KNL-NEXT: vzeroupper
488 ; SKX-LABEL: trunc_wb_256:
490 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
491 ; SKX-NEXT: vzeroupper
493 %x = trunc <16 x i16> %i to <16 x i8>
497 define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
498 ; KNL-LABEL: trunc_wb_256_mem:
500 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
501 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
502 ; KNL-NEXT: vzeroupper
505 ; SKX-LABEL: trunc_wb_256_mem:
507 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
508 ; SKX-NEXT: vzeroupper
510 %x = trunc <16 x i16> %i to <16 x i8>
511 store <16 x i8> %x, <16 x i8>* %res
515 define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
516 ; ALL-LABEL: trunc_wb_128:
519 %x = trunc <8 x i16> %i to <8 x i8>
523 define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
524 ; KNL-LABEL: trunc_wb_128_mem:
526 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
527 ; KNL-NEXT: vmovq %xmm0, (%rdi)
530 ; SKX-LABEL: trunc_wb_128_mem:
532 ; SKX-NEXT: vpmovwb %xmm0, (%rdi)
534 %x = trunc <8 x i16> %i to <8 x i8>
535 store <8 x i8> %x, <8 x i8>* %res
540 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
541 ; KNL-LABEL: usat_trunc_wb_256_mem:
543 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
544 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
545 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
546 ; KNL-NEXT: vzeroupper
549 ; SKX-LABEL: usat_trunc_wb_256_mem:
551 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
552 ; SKX-NEXT: vzeroupper
554 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
555 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
556 %x6 = trunc <16 x i16> %x5 to <16 x i8>
557 store <16 x i8> %x6, <16 x i8>* %res, align 1
561 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
562 ; KNL-LABEL: usat_trunc_wb_256:
564 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
565 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
566 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
567 ; KNL-NEXT: vzeroupper
570 ; SKX-LABEL: usat_trunc_wb_256:
572 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
573 ; SKX-NEXT: vzeroupper
575 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
576 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
577 %x6 = trunc <16 x i16> %x5 to <16 x i8>
581 define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
582 ; KNL-LABEL: usat_trunc_wb_128_mem:
584 ; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
585 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
586 ; KNL-NEXT: vmovq %xmm0, (%rdi)
589 ; SKX-LABEL: usat_trunc_wb_128_mem:
591 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
593 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
594 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
595 %x6 = trunc <8 x i16> %x5 to <8 x i8>
596 store <8 x i8> %x6, <8 x i8>* %res, align 1
600 define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
601 ; ALL-LABEL: usat_trunc_db_512_mem:
603 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
604 ; ALL-NEXT: vzeroupper
606 %x3 = icmp ult <16 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
607 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
608 %x6 = trunc <16 x i32> %x5 to <16 x i8>
609 store <16 x i8> %x6, <16 x i8>* %res, align 1
613 define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
614 ; ALL-LABEL: usat_trunc_qb_512_mem:
616 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
617 ; ALL-NEXT: vzeroupper
619 %x3 = icmp ult <8 x i64> %i, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
620 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
621 %x6 = trunc <8 x i64> %x5 to <8 x i8>
622 store <8 x i8> %x6, <8 x i8>* %res, align 1
626 define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
627 ; ALL-LABEL: usat_trunc_qd_512_mem:
629 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
630 ; ALL-NEXT: vzeroupper
632 %x3 = icmp ult <8 x i64> %i, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
633 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
634 %x6 = trunc <8 x i64> %x5 to <8 x i32>
635 store <8 x i32> %x6, <8 x i32>* %res, align 1
639 define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
640 ; ALL-LABEL: usat_trunc_qw_512_mem:
642 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
643 ; ALL-NEXT: vzeroupper
645 %x3 = icmp ult <8 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
646 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
647 %x6 = trunc <8 x i64> %x5 to <8 x i16>
648 store <8 x i16> %x6, <8 x i16>* %res, align 1
652 define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
653 ; ALL-LABEL: usat_trunc_db_1024:
655 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
656 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
657 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
659 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
660 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
661 %x6 = trunc <32 x i32> %x5 to <32 x i8>
665 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
666 ; ALL-LABEL: usat_trunc_db_1024_mem:
668 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
669 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
670 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
671 ; ALL-NEXT: vmovdqu %ymm0, (%rdi)
672 ; ALL-NEXT: vzeroupper
674 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
675 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
676 %x6 = trunc <32 x i32> %x5 to <32 x i8>
677 store <32 x i8>%x6, <32 x i8>* %p, align 1
681 define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
682 ; ALL-LABEL: usat_trunc_dw_512:
684 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
686 %x3 = icmp ult <16 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
687 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
688 %x6 = trunc <16 x i32> %x5 to <16 x i16>
692 define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
693 ; ALL-LABEL: usat_trunc_wb_128:
695 ; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
697 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
698 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
699 %x6 = trunc <8 x i16> %x5 to <8 x i8>
703 define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
704 ; ALL-LABEL: usat_trunc_qw_1024:
706 ; ALL-NEXT: vpmovusqw %zmm0, %xmm0
707 ; ALL-NEXT: vpmovusqw %zmm1, %xmm1
708 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
710 %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
711 %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
712 %x6 = trunc <16 x i64> %x5 to <16 x i16>
716 define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
717 ; KNL-LABEL: usat_trunc_db_256:
719 ; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
720 ; KNL-NEXT: vpminud %ymm1, %ymm0, %ymm0
721 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
722 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
723 ; KNL-NEXT: vzeroupper
726 ; SKX-LABEL: usat_trunc_db_256:
728 ; SKX-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0
729 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
730 ; SKX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
731 ; SKX-NEXT: vzeroupper
733 %tmp1 = icmp ult <8 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
734 %tmp2 = select <8 x i1> %tmp1, <8 x i32> %x, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
735 %tmp3 = trunc <8 x i32> %tmp2 to <8 x i8>
736 %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
742 ; Tests for the following unsigned saturation pattern:
744 ; %a = icmp sgt %x, C1
745 ; %b = select %a, %x, C2
746 ; %c = icmp slt %b, C2
747 ; %d = select %c, %b, C2
751 define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, <16 x i8>* %res) {
752 ; KNL-LABEL: smax_usat_trunc_wb_256_mem1:
754 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
755 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
756 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
757 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
758 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
759 ; KNL-NEXT: vzeroupper
762 ; SKX-LABEL: smax_usat_trunc_wb_256_mem1:
764 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
765 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
766 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
767 ; SKX-NEXT: vzeroupper
769 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
770 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
771 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
772 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
773 %x6 = trunc <16 x i16> %x5 to <16 x i8>
774 store <16 x i8> %x6, <16 x i8>* %res, align 1
778 ; Test for smax(smin(x, C2), C1).
779 define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, <16 x i8>* %res) {
780 ; KNL-LABEL: smax_usat_trunc_wb_256_mem2:
782 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
783 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
784 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
785 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
786 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
787 ; KNL-NEXT: vzeroupper
790 ; SKX-LABEL: smax_usat_trunc_wb_256_mem2:
792 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
793 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
794 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
795 ; SKX-NEXT: vzeroupper
797 %x1 = icmp slt <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
798 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
799 %x3 = icmp sgt <16 x i16> %x2, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
800 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
801 %x6 = trunc <16 x i16> %x5 to <16 x i8>
802 store <16 x i8> %x6, <16 x i8>* %res, align 1
806 define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
807 ; KNL-LABEL: smax_usat_trunc_wb_256:
809 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
810 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
811 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
812 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
813 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
814 ; KNL-NEXT: vzeroupper
817 ; SKX-LABEL: smax_usat_trunc_wb_256:
819 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
820 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
821 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
822 ; SKX-NEXT: vzeroupper
824 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
825 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
826 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
827 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
828 %x6 = trunc <16 x i16> %x5 to <16 x i8>
832 define void @smax_usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
833 ; KNL-LABEL: smax_usat_trunc_wb_128_mem:
835 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
836 ; KNL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
837 ; KNL-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
838 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
839 ; KNL-NEXT: vmovq %xmm0, (%rdi)
842 ; SKX-LABEL: smax_usat_trunc_wb_128_mem:
844 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
845 ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
846 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
848 %x1 = icmp sgt <8 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
849 %x2 = select <8 x i1> %x1, <8 x i16> %i, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
850 %x3 = icmp slt <8 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
851 %x5 = select <8 x i1> %x3, <8 x i16> %x2, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
852 %x6 = trunc <8 x i16> %x5 to <8 x i8>
853 store <8 x i8> %x6, <8 x i8>* %res, align 1
857 define void @smax_usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
858 ; ALL-LABEL: smax_usat_trunc_db_512_mem:
860 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
861 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
862 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
863 ; ALL-NEXT: vzeroupper
865 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
866 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
867 %x3 = icmp slt <16 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
868 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
869 %x6 = trunc <16 x i32> %x5 to <16 x i8>
870 store <16 x i8> %x6, <16 x i8>* %res, align 1
874 define void @smax_usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
875 ; ALL-LABEL: smax_usat_trunc_qb_512_mem:
877 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
878 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
879 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
880 ; ALL-NEXT: vzeroupper
882 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
883 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
884 %x3 = icmp slt <8 x i64> %x2, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
885 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
886 %x6 = trunc <8 x i64> %x5 to <8 x i8>
887 store <8 x i8> %x6, <8 x i8>* %res, align 1
891 define void @smax_usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
892 ; ALL-LABEL: smax_usat_trunc_qd_512_mem:
894 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
895 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
896 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
897 ; ALL-NEXT: vzeroupper
899 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
900 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
901 %x3 = icmp slt <8 x i64> %x2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
902 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
903 %x6 = trunc <8 x i64> %x5 to <8 x i32>
904 store <8 x i32> %x6, <8 x i32>* %res, align 1
908 define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
909 ; ALL-LABEL: smax_usat_trunc_qw_512_mem:
911 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
912 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
913 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
914 ; ALL-NEXT: vzeroupper
916 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
917 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
918 %x3 = icmp slt <8 x i64> %x2, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
919 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
920 %x6 = trunc <8 x i64> %x5 to <8 x i16>
921 store <8 x i16> %x6, <8 x i16>* %res, align 1
925 define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
926 ; ALL-LABEL: smax_usat_trunc_db_1024:
928 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
929 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
930 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
931 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
932 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
933 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
935 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
936 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
937 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
938 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
939 %x6 = trunc <32 x i32> %x5 to <32 x i8>
943 define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
944 ; ALL-LABEL: smax_usat_trunc_db_1024_mem:
946 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
947 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
948 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
949 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
950 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
951 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
952 ; ALL-NEXT: vmovdqu %ymm0, (%rdi)
953 ; ALL-NEXT: vzeroupper
955 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
956 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
957 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
958 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
959 %x6 = trunc <32 x i32> %x5 to <32 x i8>
960 store <32 x i8>%x6, <32 x i8>* %p, align 1
964 define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) {
965 ; ALL-LABEL: smax_usat_trunc_dw_512:
967 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
968 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
969 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
971 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
972 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
973 %x3 = icmp slt <16 x i32> %x2, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
974 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
975 %x6 = trunc <16 x i32> %x5 to <16 x i16>
979 define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
980 ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
982 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
983 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
984 ; KNL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
985 ; KNL-NEXT: vpminsw %ymm1, %ymm0, %ymm0
986 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
987 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
988 ; KNL-NEXT: vzeroupper
991 ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
993 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
994 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
995 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
996 ; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0
997 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
998 ; SKX-NEXT: vzeroupper
1000 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
1001 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
1002 %x3 = icmp slt <16 x i16> %x2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1003 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1004 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1005 store <16 x i8> %x6, <16 x i8>* %res, align 1
1009 define void @negative_test2_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
1010 ; KNL-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1012 ; KNL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1013 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1014 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1015 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
1016 ; KNL-NEXT: vzeroupper
1019 ; SKX-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1021 ; SKX-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1022 ; SKX-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1023 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1024 ; SKX-NEXT: vzeroupper
1026 %x1 = icmp sgt <16 x i16> %i, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1027 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1028 %x3 = icmp slt <16 x i16> %x2, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1029 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1030 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1031 store <16 x i8> %x6, <16 x i8>* %res, align 1