1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,SKX
5 attributes #0 = { nounwind }
7 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
8 ; ALL-LABEL: trunc_16x32_to_16x8:
10 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
11 ; ALL-NEXT: vzeroupper
13 %x = trunc <16 x i32> %i to <16 x i8>
17 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
18 ; ALL-LABEL: trunc_8x64_to_8x16:
20 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
21 ; ALL-NEXT: vzeroupper
23 %x = trunc <8 x i64> %i to <8 x i16>
27 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
28 ; ALL-LABEL: trunc_v16i32_to_v16i16:
30 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
32 %1 = trunc <16 x i32> %x to <16 x i16>
36 define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
37 ; ALL-LABEL: trunc_qb_512:
39 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
40 ; ALL-NEXT: vzeroupper
42 %x = trunc <8 x i64> %i to <8 x i8>
46 define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
47 ; ALL-LABEL: trunc_qb_512_mem:
49 ; ALL-NEXT: vpmovqb %zmm0, (%rdi)
50 ; ALL-NEXT: vzeroupper
52 %x = trunc <8 x i64> %i to <8 x i8>
53 store <8 x i8> %x, <8 x i8>* %res
57 define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
58 ; KNL-LABEL: trunc_qb_256:
60 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
61 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
62 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
63 ; KNL-NEXT: vzeroupper
66 ; SKX-LABEL: trunc_qb_256:
68 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
69 ; SKX-NEXT: vzeroupper
71 %x = trunc <4 x i64> %i to <4 x i8>
75 define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
76 ; KNL-LABEL: trunc_qb_256_mem:
78 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
79 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
80 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
81 ; KNL-NEXT: vmovd %xmm0, (%rdi)
82 ; KNL-NEXT: vzeroupper
85 ; SKX-LABEL: trunc_qb_256_mem:
87 ; SKX-NEXT: vpmovqb %ymm0, (%rdi)
88 ; SKX-NEXT: vzeroupper
90 %x = trunc <4 x i64> %i to <4 x i8>
91 store <4 x i8> %x, <4 x i8>* %res
95 define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
96 ; ALL-LABEL: trunc_qb_128:
99 %x = trunc <2 x i64> %i to <2 x i8>
103 define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
104 ; KNL-LABEL: trunc_qb_128_mem:
106 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
107 ; KNL-NEXT: vpextrw $0, %xmm0, (%rdi)
110 ; SKX-LABEL: trunc_qb_128_mem:
112 ; SKX-NEXT: vpmovqb %xmm0, (%rdi)
114 %x = trunc <2 x i64> %i to <2 x i8>
115 store <2 x i8> %x, <2 x i8>* %res
119 define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
120 ; ALL-LABEL: trunc_qw_512:
122 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
123 ; ALL-NEXT: vzeroupper
125 %x = trunc <8 x i64> %i to <8 x i16>
129 define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
130 ; ALL-LABEL: trunc_qw_512_mem:
132 ; ALL-NEXT: vpmovqw %zmm0, (%rdi)
133 ; ALL-NEXT: vzeroupper
135 %x = trunc <8 x i64> %i to <8 x i16>
136 store <8 x i16> %x, <8 x i16>* %res
140 define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
141 ; KNL-LABEL: trunc_qw_256:
143 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
144 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
145 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
146 ; KNL-NEXT: vzeroupper
149 ; SKX-LABEL: trunc_qw_256:
151 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
152 ; SKX-NEXT: vzeroupper
154 %x = trunc <4 x i64> %i to <4 x i16>
158 define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
159 ; KNL-LABEL: trunc_qw_256_mem:
161 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
162 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
163 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
164 ; KNL-NEXT: vmovq %xmm0, (%rdi)
165 ; KNL-NEXT: vzeroupper
168 ; SKX-LABEL: trunc_qw_256_mem:
170 ; SKX-NEXT: vpmovqw %ymm0, (%rdi)
171 ; SKX-NEXT: vzeroupper
173 %x = trunc <4 x i64> %i to <4 x i16>
174 store <4 x i16> %x, <4 x i16>* %res
178 define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
179 ; ALL-LABEL: trunc_qw_128:
182 %x = trunc <2 x i64> %i to <2 x i16>
186 define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
187 ; KNL-LABEL: trunc_qw_128_mem:
189 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
190 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
191 ; KNL-NEXT: vmovd %xmm0, (%rdi)
194 ; SKX-LABEL: trunc_qw_128_mem:
196 ; SKX-NEXT: vpmovqw %xmm0, (%rdi)
198 %x = trunc <2 x i64> %i to <2 x i16>
199 store <2 x i16> %x, <2 x i16>* %res
203 define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
204 ; ALL-LABEL: trunc_qd_512:
206 ; ALL-NEXT: vpmovqd %zmm0, %ymm0
208 %x = trunc <8 x i64> %i to <8 x i32>
212 define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
213 ; ALL-LABEL: trunc_qd_512_mem:
215 ; ALL-NEXT: vpmovqd %zmm0, (%rdi)
216 ; ALL-NEXT: vzeroupper
218 %x = trunc <8 x i64> %i to <8 x i32>
219 store <8 x i32> %x, <8 x i32>* %res
223 define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
224 ; KNL-LABEL: trunc_qd_256:
226 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
227 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
228 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
229 ; KNL-NEXT: vzeroupper
232 ; SKX-LABEL: trunc_qd_256:
234 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
235 ; SKX-NEXT: vzeroupper
237 %x = trunc <4 x i64> %i to <4 x i32>
241 define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
242 ; KNL-LABEL: trunc_qd_256_mem:
244 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
245 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
246 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
247 ; KNL-NEXT: vzeroupper
250 ; SKX-LABEL: trunc_qd_256_mem:
252 ; SKX-NEXT: vpmovqd %ymm0, (%rdi)
253 ; SKX-NEXT: vzeroupper
255 %x = trunc <4 x i64> %i to <4 x i32>
256 store <4 x i32> %x, <4 x i32>* %res
260 define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
261 ; ALL-LABEL: trunc_qd_128:
264 %x = trunc <2 x i64> %i to <2 x i32>
268 define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
269 ; KNL-LABEL: trunc_qd_128_mem:
271 ; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
272 ; KNL-NEXT: vmovlps %xmm0, (%rdi)
275 ; SKX-LABEL: trunc_qd_128_mem:
277 ; SKX-NEXT: vpmovqd %xmm0, (%rdi)
279 %x = trunc <2 x i64> %i to <2 x i32>
280 store <2 x i32> %x, <2 x i32>* %res
284 define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
285 ; ALL-LABEL: trunc_db_512:
287 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
288 ; ALL-NEXT: vzeroupper
290 %x = trunc <16 x i32> %i to <16 x i8>
294 define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
295 ; ALL-LABEL: trunc_db_512_mem:
297 ; ALL-NEXT: vpmovdb %zmm0, (%rdi)
298 ; ALL-NEXT: vzeroupper
300 %x = trunc <16 x i32> %i to <16 x i8>
301 store <16 x i8> %x, <16 x i8>* %res
305 define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
306 ; KNL-LABEL: trunc_db_256:
308 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
309 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
310 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
311 ; KNL-NEXT: vzeroupper
314 ; SKX-LABEL: trunc_db_256:
316 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
317 ; SKX-NEXT: vzeroupper
319 %x = trunc <8 x i32> %i to <8 x i8>
323 define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
324 ; KNL-LABEL: trunc_db_256_mem:
326 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
327 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
328 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
329 ; KNL-NEXT: vmovq %xmm0, (%rdi)
330 ; KNL-NEXT: vzeroupper
333 ; SKX-LABEL: trunc_db_256_mem:
335 ; SKX-NEXT: vpmovdb %ymm0, (%rdi)
336 ; SKX-NEXT: vzeroupper
338 %x = trunc <8 x i32> %i to <8 x i8>
339 store <8 x i8> %x, <8 x i8>* %res
343 define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
344 ; ALL-LABEL: trunc_db_128:
347 %x = trunc <4 x i32> %i to <4 x i8>
351 define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
352 ; KNL-LABEL: trunc_db_128_mem:
354 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
355 ; KNL-NEXT: vmovd %xmm0, (%rdi)
358 ; SKX-LABEL: trunc_db_128_mem:
360 ; SKX-NEXT: vpmovdb %xmm0, (%rdi)
362 %x = trunc <4 x i32> %i to <4 x i8>
363 store <4 x i8> %x, <4 x i8>* %res
367 define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
368 ; ALL-LABEL: trunc_dw_512:
370 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
372 %x = trunc <16 x i32> %i to <16 x i16>
376 define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
377 ; ALL-LABEL: trunc_dw_512_mem:
379 ; ALL-NEXT: vpmovdw %zmm0, (%rdi)
380 ; ALL-NEXT: vzeroupper
382 %x = trunc <16 x i32> %i to <16 x i16>
383 store <16 x i16> %x, <16 x i16>* %res
387 define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
388 ; KNL-LABEL: trunc_dw_256:
390 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
391 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
392 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
393 ; KNL-NEXT: vzeroupper
396 ; SKX-LABEL: trunc_dw_256:
398 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
399 ; SKX-NEXT: vzeroupper
401 %x = trunc <8 x i32> %i to <8 x i16>
405 define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
406 ; KNL-LABEL: trunc_dw_256_mem:
408 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
409 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
410 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
411 ; KNL-NEXT: vzeroupper
414 ; SKX-LABEL: trunc_dw_256_mem:
416 ; SKX-NEXT: vpmovdw %ymm0, (%rdi)
417 ; SKX-NEXT: vzeroupper
419 %x = trunc <8 x i32> %i to <8 x i16>
420 store <8 x i16> %x, <8 x i16>* %res
424 define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
425 ; KNL-LABEL: trunc_dw_128_mem:
427 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
428 ; KNL-NEXT: vmovq %xmm0, (%rdi)
431 ; SKX-LABEL: trunc_dw_128_mem:
433 ; SKX-NEXT: vpmovdw %xmm0, (%rdi)
435 %x = trunc <4 x i32> %i to <4 x i16>
436 store <4 x i16> %x, <4 x i16>* %res
440 define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
441 ; KNL-LABEL: trunc_wb_512:
443 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
444 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
445 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
446 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
447 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
450 ; SKX-LABEL: trunc_wb_512:
452 ; SKX-NEXT: vpmovwb %zmm0, %ymm0
454 %x = trunc <32 x i16> %i to <32 x i8>
458 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
459 ; KNL-LABEL: trunc_wb_512_mem:
461 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
462 ; KNL-NEXT: vpmovdb %zmm1, 16(%rdi)
463 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
464 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
465 ; KNL-NEXT: vzeroupper
468 ; SKX-LABEL: trunc_wb_512_mem:
470 ; SKX-NEXT: vpmovwb %zmm0, (%rdi)
471 ; SKX-NEXT: vzeroupper
473 %x = trunc <32 x i16> %i to <32 x i8>
474 store <32 x i8> %x, <32 x i8>* %res
478 define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
479 ; KNL-LABEL: trunc_wb_256:
481 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
482 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
483 ; KNL-NEXT: vzeroupper
486 ; SKX-LABEL: trunc_wb_256:
488 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
489 ; SKX-NEXT: vzeroupper
491 %x = trunc <16 x i16> %i to <16 x i8>
495 define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
496 ; KNL-LABEL: trunc_wb_256_mem:
498 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
499 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
500 ; KNL-NEXT: vzeroupper
503 ; SKX-LABEL: trunc_wb_256_mem:
505 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
506 ; SKX-NEXT: vzeroupper
508 %x = trunc <16 x i16> %i to <16 x i8>
509 store <16 x i8> %x, <16 x i8>* %res
513 define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
514 ; ALL-LABEL: trunc_wb_128:
517 %x = trunc <8 x i16> %i to <8 x i8>
521 define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
522 ; KNL-LABEL: trunc_wb_128_mem:
524 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
525 ; KNL-NEXT: vmovq %xmm0, (%rdi)
528 ; SKX-LABEL: trunc_wb_128_mem:
530 ; SKX-NEXT: vpmovwb %xmm0, (%rdi)
532 %x = trunc <8 x i16> %i to <8 x i8>
533 store <8 x i8> %x, <8 x i8>* %res
538 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
539 ; KNL-LABEL: usat_trunc_wb_256_mem:
541 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
542 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
543 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
544 ; KNL-NEXT: vzeroupper
547 ; SKX-LABEL: usat_trunc_wb_256_mem:
549 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
550 ; SKX-NEXT: vzeroupper
552 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
553 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
554 %x6 = trunc <16 x i16> %x5 to <16 x i8>
555 store <16 x i8> %x6, <16 x i8>* %res, align 1
559 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
560 ; KNL-LABEL: usat_trunc_wb_256:
562 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
563 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
564 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
565 ; KNL-NEXT: vzeroupper
568 ; SKX-LABEL: usat_trunc_wb_256:
570 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
571 ; SKX-NEXT: vzeroupper
573 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
574 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
575 %x6 = trunc <16 x i16> %x5 to <16 x i8>
579 define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
580 ; KNL-LABEL: usat_trunc_wb_128_mem:
582 ; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
583 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
584 ; KNL-NEXT: vmovq %xmm0, (%rdi)
587 ; SKX-LABEL: usat_trunc_wb_128_mem:
589 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
591 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
592 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
593 %x6 = trunc <8 x i16> %x5 to <8 x i8>
594 store <8 x i8> %x6, <8 x i8>* %res, align 1
598 define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
599 ; ALL-LABEL: usat_trunc_db_512_mem:
601 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
602 ; ALL-NEXT: vzeroupper
604 %x3 = icmp ult <16 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
605 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
606 %x6 = trunc <16 x i32> %x5 to <16 x i8>
607 store <16 x i8> %x6, <16 x i8>* %res, align 1
611 define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
612 ; ALL-LABEL: usat_trunc_qb_512_mem:
614 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
615 ; ALL-NEXT: vzeroupper
617 %x3 = icmp ult <8 x i64> %i, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
618 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
619 %x6 = trunc <8 x i64> %x5 to <8 x i8>
620 store <8 x i8> %x6, <8 x i8>* %res, align 1
624 define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
625 ; ALL-LABEL: usat_trunc_qd_512_mem:
627 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
628 ; ALL-NEXT: vzeroupper
630 %x3 = icmp ult <8 x i64> %i, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
631 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
632 %x6 = trunc <8 x i64> %x5 to <8 x i32>
633 store <8 x i32> %x6, <8 x i32>* %res, align 1
637 define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
638 ; ALL-LABEL: usat_trunc_qw_512_mem:
640 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
641 ; ALL-NEXT: vzeroupper
643 %x3 = icmp ult <8 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
644 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
645 %x6 = trunc <8 x i64> %x5 to <8 x i16>
646 store <8 x i16> %x6, <8 x i16>* %res, align 1
650 define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
651 ; ALL-LABEL: usat_trunc_db_1024:
653 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
654 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
655 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
657 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
658 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
659 %x6 = trunc <32 x i32> %x5 to <32 x i8>
663 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
664 ; ALL-LABEL: usat_trunc_db_1024_mem:
666 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
667 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
668 ; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
669 ; ALL-NEXT: vmovdqu %xmm0, (%rdi)
670 ; ALL-NEXT: vzeroupper
672 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
673 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
674 %x6 = trunc <32 x i32> %x5 to <32 x i8>
675 store <32 x i8>%x6, <32 x i8>* %p, align 1
679 define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
680 ; ALL-LABEL: usat_trunc_dw_512:
682 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
684 %x3 = icmp ult <16 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
685 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
686 %x6 = trunc <16 x i32> %x5 to <16 x i16>
690 define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
691 ; ALL-LABEL: usat_trunc_wb_128:
693 ; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
695 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
696 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
697 %x6 = trunc <8 x i16> %x5 to <8 x i8>
701 define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
702 ; ALL-LABEL: usat_trunc_qw_1024:
704 ; ALL-NEXT: vpmovusqw %zmm0, %xmm0
705 ; ALL-NEXT: vpmovusqw %zmm1, %xmm1
706 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
708 %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
709 %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
710 %x6 = trunc <16 x i64> %x5 to <16 x i16>
714 define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
715 ; KNL-LABEL: usat_trunc_db_256:
717 ; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
718 ; KNL-NEXT: vpminud %ymm1, %ymm0, %ymm0
719 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
720 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
721 ; KNL-NEXT: vzeroupper
724 ; SKX-LABEL: usat_trunc_db_256:
726 ; SKX-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0
727 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
728 ; SKX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
729 ; SKX-NEXT: vzeroupper
731 %tmp1 = icmp ult <8 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
732 %tmp2 = select <8 x i1> %tmp1, <8 x i32> %x, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
733 %tmp3 = trunc <8 x i32> %tmp2 to <8 x i8>
734 %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
740 ; Tests for the following unsigned saturation pattern:
742 ; %a = icmp sgt %x, C1
743 ; %b = select %a, %x, C2
744 ; %c = icmp slt %b, C2
745 ; %d = select %c, %b, C2
749 define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, <16 x i8>* %res) {
750 ; KNL-LABEL: smax_usat_trunc_wb_256_mem1:
752 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
753 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
754 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
755 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
756 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
757 ; KNL-NEXT: vzeroupper
760 ; SKX-LABEL: smax_usat_trunc_wb_256_mem1:
762 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
763 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
764 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
765 ; SKX-NEXT: vzeroupper
767 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
768 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
769 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
770 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
771 %x6 = trunc <16 x i16> %x5 to <16 x i8>
772 store <16 x i8> %x6, <16 x i8>* %res, align 1
776 ; Test for smax(smin(x, C2), C1).
777 define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, <16 x i8>* %res) {
778 ; KNL-LABEL: smax_usat_trunc_wb_256_mem2:
780 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
781 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
782 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
783 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
784 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
785 ; KNL-NEXT: vzeroupper
788 ; SKX-LABEL: smax_usat_trunc_wb_256_mem2:
790 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
791 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
792 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
793 ; SKX-NEXT: vzeroupper
795 %x1 = icmp slt <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
796 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
797 %x3 = icmp sgt <16 x i16> %x2, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
798 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
799 %x6 = trunc <16 x i16> %x5 to <16 x i8>
800 store <16 x i8> %x6, <16 x i8>* %res, align 1
804 define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
805 ; KNL-LABEL: smax_usat_trunc_wb_256:
807 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
808 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
809 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
810 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
811 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
812 ; KNL-NEXT: vzeroupper
815 ; SKX-LABEL: smax_usat_trunc_wb_256:
817 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
818 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
819 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
820 ; SKX-NEXT: vzeroupper
822 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
823 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
824 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
825 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
826 %x6 = trunc <16 x i16> %x5 to <16 x i8>
830 define void @smax_usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
831 ; KNL-LABEL: smax_usat_trunc_wb_128_mem:
833 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
834 ; KNL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
835 ; KNL-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
836 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
837 ; KNL-NEXT: vmovq %xmm0, (%rdi)
840 ; SKX-LABEL: smax_usat_trunc_wb_128_mem:
842 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
843 ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
844 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
846 %x1 = icmp sgt <8 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
847 %x2 = select <8 x i1> %x1, <8 x i16> %i, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
848 %x3 = icmp slt <8 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
849 %x5 = select <8 x i1> %x3, <8 x i16> %x2, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
850 %x6 = trunc <8 x i16> %x5 to <8 x i8>
851 store <8 x i8> %x6, <8 x i8>* %res, align 1
855 define void @smax_usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
856 ; ALL-LABEL: smax_usat_trunc_db_512_mem:
858 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
859 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
860 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
861 ; ALL-NEXT: vzeroupper
863 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
864 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
865 %x3 = icmp slt <16 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
866 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
867 %x6 = trunc <16 x i32> %x5 to <16 x i8>
868 store <16 x i8> %x6, <16 x i8>* %res, align 1
872 define void @smax_usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
873 ; ALL-LABEL: smax_usat_trunc_qb_512_mem:
875 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
876 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
877 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
878 ; ALL-NEXT: vzeroupper
880 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
881 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
882 %x3 = icmp slt <8 x i64> %x2, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
883 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
884 %x6 = trunc <8 x i64> %x5 to <8 x i8>
885 store <8 x i8> %x6, <8 x i8>* %res, align 1
889 define void @smax_usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
890 ; ALL-LABEL: smax_usat_trunc_qd_512_mem:
892 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
893 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
894 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
895 ; ALL-NEXT: vzeroupper
897 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
898 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
899 %x3 = icmp slt <8 x i64> %x2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
900 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
901 %x6 = trunc <8 x i64> %x5 to <8 x i32>
902 store <8 x i32> %x6, <8 x i32>* %res, align 1
906 define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
907 ; ALL-LABEL: smax_usat_trunc_qw_512_mem:
909 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
910 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
911 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
912 ; ALL-NEXT: vzeroupper
914 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
915 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
916 %x3 = icmp slt <8 x i64> %x2, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
917 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
918 %x6 = trunc <8 x i64> %x5 to <8 x i16>
919 store <8 x i16> %x6, <8 x i16>* %res, align 1
923 define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
924 ; ALL-LABEL: smax_usat_trunc_db_1024:
926 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
927 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
928 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
929 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
930 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
931 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
933 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
934 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
935 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
936 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
937 %x6 = trunc <32 x i32> %x5 to <32 x i8>
941 define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
942 ; ALL-LABEL: smax_usat_trunc_db_1024_mem:
944 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
945 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
946 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
947 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
948 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
949 ; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
950 ; ALL-NEXT: vmovdqu %xmm0, (%rdi)
951 ; ALL-NEXT: vzeroupper
953 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
954 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
955 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
956 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
957 %x6 = trunc <32 x i32> %x5 to <32 x i8>
958 store <32 x i8>%x6, <32 x i8>* %p, align 1
962 define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) {
963 ; ALL-LABEL: smax_usat_trunc_dw_512:
965 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
966 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
967 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
969 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
970 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
971 %x3 = icmp slt <16 x i32> %x2, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
972 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
973 %x6 = trunc <16 x i32> %x5 to <16 x i16>
977 define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
978 ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
980 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
981 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
982 ; KNL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
983 ; KNL-NEXT: vpminsw %ymm1, %ymm0, %ymm0
984 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
985 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
986 ; KNL-NEXT: vzeroupper
989 ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
991 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
992 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
993 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
994 ; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0
995 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
996 ; SKX-NEXT: vzeroupper
998 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
999 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
1000 %x3 = icmp slt <16 x i16> %x2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1001 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1002 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1003 store <16 x i8> %x6, <16 x i8>* %res, align 1
1007 define void @negative_test2_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
1008 ; KNL-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1010 ; KNL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1011 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1012 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1013 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
1014 ; KNL-NEXT: vzeroupper
1017 ; SKX-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1019 ; SKX-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1020 ; SKX-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1021 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1022 ; SKX-NEXT: vzeroupper
1024 %x1 = icmp sgt <16 x i16> %i, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1025 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1026 %x3 = icmp slt <16 x i16> %x2, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1027 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1028 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1029 store <16 x i8> %x6, <16 x i8>* %res, align 1