1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,SKX
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,SKX
6 attributes #0 = { nounwind }
8 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
9 ; ALL-LABEL: trunc_16x32_to_16x8:
11 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
12 ; ALL-NEXT: vzeroupper
14 %x = trunc <16 x i32> %i to <16 x i8>
18 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
19 ; ALL-LABEL: trunc_8x64_to_8x16:
21 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
22 ; ALL-NEXT: vzeroupper
24 %x = trunc <8 x i64> %i to <8 x i16>
28 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
29 ; ALL-LABEL: trunc_v16i32_to_v16i16:
31 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
33 %1 = trunc <16 x i32> %x to <16 x i16>
37 define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
38 ; ALL-LABEL: trunc_qb_512:
40 ; ALL-NEXT: vpmovqb %zmm0, %xmm0
41 ; ALL-NEXT: vzeroupper
43 %x = trunc <8 x i64> %i to <8 x i8>
47 define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
48 ; ALL-LABEL: trunc_qb_512_mem:
50 ; ALL-NEXT: vpmovqb %zmm0, (%rdi)
51 ; ALL-NEXT: vzeroupper
53 %x = trunc <8 x i64> %i to <8 x i8>
54 store <8 x i8> %x, <8 x i8>* %res
58 define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
59 ; KNL-LABEL: trunc_qb_256:
61 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
62 ; KNL-NEXT: vpmovqb %zmm0, %xmm0
63 ; KNL-NEXT: vzeroupper
66 ; SKX-LABEL: trunc_qb_256:
68 ; SKX-NEXT: vpmovqb %ymm0, %xmm0
69 ; SKX-NEXT: vzeroupper
71 %x = trunc <4 x i64> %i to <4 x i8>
75 define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
76 ; KNL-LABEL: trunc_qb_256_mem:
78 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
79 ; KNL-NEXT: vpmovqb %zmm0, %xmm0
80 ; KNL-NEXT: vmovd %xmm0, (%rdi)
81 ; KNL-NEXT: vzeroupper
84 ; SKX-LABEL: trunc_qb_256_mem:
86 ; SKX-NEXT: vpmovqb %ymm0, (%rdi)
87 ; SKX-NEXT: vzeroupper
89 %x = trunc <4 x i64> %i to <4 x i8>
90 store <4 x i8> %x, <4 x i8>* %res
94 define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
95 ; KNL-LABEL: trunc_qb_128:
97 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
100 ; SKX-LABEL: trunc_qb_128:
102 ; SKX-NEXT: vpmovqb %xmm0, %xmm0
104 %x = trunc <2 x i64> %i to <2 x i8>
108 define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
109 ; KNL-LABEL: trunc_qb_128_mem:
111 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
112 ; KNL-NEXT: vpextrw $0, %xmm0, (%rdi)
115 ; SKX-LABEL: trunc_qb_128_mem:
117 ; SKX-NEXT: vpmovqb %xmm0, (%rdi)
119 %x = trunc <2 x i64> %i to <2 x i8>
120 store <2 x i8> %x, <2 x i8>* %res
124 define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
125 ; ALL-LABEL: trunc_qw_512:
127 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
128 ; ALL-NEXT: vzeroupper
130 %x = trunc <8 x i64> %i to <8 x i16>
134 define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
135 ; ALL-LABEL: trunc_qw_512_mem:
137 ; ALL-NEXT: vpmovqw %zmm0, (%rdi)
138 ; ALL-NEXT: vzeroupper
140 %x = trunc <8 x i64> %i to <8 x i16>
141 store <8 x i16> %x, <8 x i16>* %res
145 define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
146 ; KNL-LABEL: trunc_qw_256:
148 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
149 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
150 ; KNL-NEXT: vzeroupper
153 ; SKX-LABEL: trunc_qw_256:
155 ; SKX-NEXT: vpmovqw %ymm0, %xmm0
156 ; SKX-NEXT: vzeroupper
158 %x = trunc <4 x i64> %i to <4 x i16>
162 define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
163 ; KNL-LABEL: trunc_qw_256_mem:
165 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
166 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
167 ; KNL-NEXT: vmovq %xmm0, (%rdi)
168 ; KNL-NEXT: vzeroupper
171 ; SKX-LABEL: trunc_qw_256_mem:
173 ; SKX-NEXT: vpmovqw %ymm0, (%rdi)
174 ; SKX-NEXT: vzeroupper
176 %x = trunc <4 x i64> %i to <4 x i16>
177 store <4 x i16> %x, <4 x i16>* %res
181 define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
182 ; KNL-LABEL: trunc_qw_128:
184 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
185 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
188 ; SKX-LABEL: trunc_qw_128:
190 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
192 %x = trunc <2 x i64> %i to <2 x i16>
196 define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
197 ; KNL-LABEL: trunc_qw_128_mem:
199 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
200 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
201 ; KNL-NEXT: vmovd %xmm0, (%rdi)
204 ; SKX-LABEL: trunc_qw_128_mem:
206 ; SKX-NEXT: vpmovqw %xmm0, (%rdi)
208 %x = trunc <2 x i64> %i to <2 x i16>
209 store <2 x i16> %x, <2 x i16>* %res
213 define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
214 ; ALL-LABEL: trunc_qd_512:
216 ; ALL-NEXT: vpmovqd %zmm0, %ymm0
218 %x = trunc <8 x i64> %i to <8 x i32>
222 define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
223 ; ALL-LABEL: trunc_qd_512_mem:
225 ; ALL-NEXT: vpmovqd %zmm0, (%rdi)
226 ; ALL-NEXT: vzeroupper
228 %x = trunc <8 x i64> %i to <8 x i32>
229 store <8 x i32> %x, <8 x i32>* %res
233 define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
234 ; KNL-LABEL: trunc_qd_256:
236 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
237 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
238 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
239 ; KNL-NEXT: vzeroupper
242 ; SKX-LABEL: trunc_qd_256:
244 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
245 ; SKX-NEXT: vzeroupper
247 %x = trunc <4 x i64> %i to <4 x i32>
251 define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
252 ; KNL-LABEL: trunc_qd_256_mem:
254 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
255 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
256 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
257 ; KNL-NEXT: vzeroupper
260 ; SKX-LABEL: trunc_qd_256_mem:
262 ; SKX-NEXT: vpmovqd %ymm0, (%rdi)
263 ; SKX-NEXT: vzeroupper
265 %x = trunc <4 x i64> %i to <4 x i32>
266 store <4 x i32> %x, <4 x i32>* %res
270 define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
271 ; ALL-LABEL: trunc_qd_128:
273 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
275 %x = trunc <2 x i64> %i to <2 x i32>
279 define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
280 ; KNL-LABEL: trunc_qd_128_mem:
282 ; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
283 ; KNL-NEXT: vmovlps %xmm0, (%rdi)
286 ; SKX-LABEL: trunc_qd_128_mem:
288 ; SKX-NEXT: vpmovqd %xmm0, (%rdi)
290 %x = trunc <2 x i64> %i to <2 x i32>
291 store <2 x i32> %x, <2 x i32>* %res
295 define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
296 ; ALL-LABEL: trunc_db_512:
298 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
299 ; ALL-NEXT: vzeroupper
301 %x = trunc <16 x i32> %i to <16 x i8>
305 define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
306 ; ALL-LABEL: trunc_db_512_mem:
308 ; ALL-NEXT: vpmovdb %zmm0, (%rdi)
309 ; ALL-NEXT: vzeroupper
311 %x = trunc <16 x i32> %i to <16 x i8>
312 store <16 x i8> %x, <16 x i8>* %res
316 define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
317 ; KNL-LABEL: trunc_db_256:
319 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
320 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
321 ; KNL-NEXT: vzeroupper
324 ; SKX-LABEL: trunc_db_256:
326 ; SKX-NEXT: vpmovdb %ymm0, %xmm0
327 ; SKX-NEXT: vzeroupper
329 %x = trunc <8 x i32> %i to <8 x i8>
333 define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
334 ; KNL-LABEL: trunc_db_256_mem:
336 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
337 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
338 ; KNL-NEXT: vmovq %xmm0, (%rdi)
339 ; KNL-NEXT: vzeroupper
342 ; SKX-LABEL: trunc_db_256_mem:
344 ; SKX-NEXT: vpmovdb %ymm0, (%rdi)
345 ; SKX-NEXT: vzeroupper
347 %x = trunc <8 x i32> %i to <8 x i8>
348 store <8 x i8> %x, <8 x i8>* %res
352 define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
353 ; KNL-LABEL: trunc_db_128:
355 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
358 ; SKX-LABEL: trunc_db_128:
360 ; SKX-NEXT: vpmovdb %xmm0, %xmm0
362 %x = trunc <4 x i32> %i to <4 x i8>
366 define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
367 ; KNL-LABEL: trunc_db_128_mem:
369 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
370 ; KNL-NEXT: vmovd %xmm0, (%rdi)
373 ; SKX-LABEL: trunc_db_128_mem:
375 ; SKX-NEXT: vpmovdb %xmm0, (%rdi)
377 %x = trunc <4 x i32> %i to <4 x i8>
378 store <4 x i8> %x, <4 x i8>* %res
382 define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
383 ; ALL-LABEL: trunc_dw_512:
385 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
387 %x = trunc <16 x i32> %i to <16 x i16>
391 define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
392 ; ALL-LABEL: trunc_dw_512_mem:
394 ; ALL-NEXT: vpmovdw %zmm0, (%rdi)
395 ; ALL-NEXT: vzeroupper
397 %x = trunc <16 x i32> %i to <16 x i16>
398 store <16 x i16> %x, <16 x i16>* %res
402 define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
403 ; KNL-LABEL: trunc_dw_256:
405 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
406 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
407 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
408 ; KNL-NEXT: vzeroupper
411 ; SKX-LABEL: trunc_dw_256:
413 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
414 ; SKX-NEXT: vzeroupper
416 %x = trunc <8 x i32> %i to <8 x i16>
420 define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
421 ; KNL-LABEL: trunc_dw_256_mem:
423 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
424 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
425 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
426 ; KNL-NEXT: vzeroupper
429 ; SKX-LABEL: trunc_dw_256_mem:
431 ; SKX-NEXT: vpmovdw %ymm0, (%rdi)
432 ; SKX-NEXT: vzeroupper
434 %x = trunc <8 x i32> %i to <8 x i16>
435 store <8 x i16> %x, <8 x i16>* %res
439 define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
440 ; KNL-LABEL: trunc_dw_128_mem:
442 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u]
443 ; KNL-NEXT: vmovq %xmm0, (%rdi)
446 ; SKX-LABEL: trunc_dw_128_mem:
448 ; SKX-NEXT: vpmovdw %xmm0, (%rdi)
450 %x = trunc <4 x i32> %i to <4 x i16>
451 store <4 x i16> %x, <4 x i16>* %res
455 define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
456 ; KNL-LABEL: trunc_wb_512:
458 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
459 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
460 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
461 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
462 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
463 ; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
466 ; SKX-LABEL: trunc_wb_512:
468 ; SKX-NEXT: vpmovwb %zmm0, %ymm0
470 %x = trunc <32 x i16> %i to <32 x i8>
474 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
475 ; KNL-LABEL: trunc_wb_512_mem:
477 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
478 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
479 ; KNL-NEXT: vpmovdb %zmm1, 16(%rdi)
480 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
481 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
482 ; KNL-NEXT: vzeroupper
485 ; SKX-LABEL: trunc_wb_512_mem:
487 ; SKX-NEXT: vpmovwb %zmm0, (%rdi)
488 ; SKX-NEXT: vzeroupper
490 %x = trunc <32 x i16> %i to <32 x i8>
491 store <32 x i8> %x, <32 x i8>* %res
495 define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
496 ; KNL-LABEL: trunc_wb_256:
498 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
499 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
500 ; KNL-NEXT: vzeroupper
503 ; SKX-LABEL: trunc_wb_256:
505 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
506 ; SKX-NEXT: vzeroupper
508 %x = trunc <16 x i16> %i to <16 x i8>
512 define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
513 ; KNL-LABEL: trunc_wb_256_mem:
515 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
516 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
517 ; KNL-NEXT: vzeroupper
520 ; SKX-LABEL: trunc_wb_256_mem:
522 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
523 ; SKX-NEXT: vzeroupper
525 %x = trunc <16 x i16> %i to <16 x i8>
526 store <16 x i8> %x, <16 x i8>* %res
530 define <16 x i8> @trunc_wb_256_mem_and_ret(<16 x i16> %i, <16 x i8>* %res) #0 {
531 ; KNL-LABEL: trunc_wb_256_mem_and_ret:
533 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
534 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
535 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
536 ; KNL-NEXT: vzeroupper
539 ; SKX-LABEL: trunc_wb_256_mem_and_ret:
541 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
542 ; SKX-NEXT: vmovdqa %xmm0, (%rdi)
543 ; SKX-NEXT: vzeroupper
545 %x = trunc <16 x i16> %i to <16 x i8>
546 store <16 x i8> %x, <16 x i8>* %res
550 define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
551 ; KNL-LABEL: trunc_wb_128:
553 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
556 ; SKX-LABEL: trunc_wb_128:
558 ; SKX-NEXT: vpmovwb %xmm0, %xmm0
560 %x = trunc <8 x i16> %i to <8 x i8>
564 define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
565 ; KNL-LABEL: trunc_wb_128_mem:
567 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
568 ; KNL-NEXT: vmovq %xmm0, (%rdi)
571 ; SKX-LABEL: trunc_wb_128_mem:
573 ; SKX-NEXT: vpmovwb %xmm0, (%rdi)
575 %x = trunc <8 x i16> %i to <8 x i8>
576 store <8 x i8> %x, <8 x i8>* %res
581 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
582 ; KNL-LABEL: usat_trunc_wb_256_mem:
584 ; KNL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
585 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
586 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
587 ; KNL-NEXT: vzeroupper
590 ; SKX-LABEL: usat_trunc_wb_256_mem:
592 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
593 ; SKX-NEXT: vzeroupper
595 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
596 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
597 %x6 = trunc <16 x i16> %x5 to <16 x i8>
598 store <16 x i8> %x6, <16 x i8>* %res, align 1
602 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
603 ; KNL-LABEL: usat_trunc_wb_256:
605 ; KNL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
606 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
607 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
608 ; KNL-NEXT: vzeroupper
611 ; SKX-LABEL: usat_trunc_wb_256:
613 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
614 ; SKX-NEXT: vzeroupper
616 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
617 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
618 %x6 = trunc <16 x i16> %x5 to <16 x i8>
622 define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
623 ; KNL-LABEL: usat_trunc_wb_128_mem:
625 ; KNL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
626 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
627 ; KNL-NEXT: vmovq %xmm0, (%rdi)
630 ; SKX-LABEL: usat_trunc_wb_128_mem:
632 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
634 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
635 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
636 %x6 = trunc <8 x i16> %x5 to <8 x i8>
637 store <8 x i8> %x6, <8 x i8>* %res, align 1
641 define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
642 ; ALL-LABEL: usat_trunc_db_512_mem:
644 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
645 ; ALL-NEXT: vzeroupper
647 %x3 = icmp ult <16 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
648 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
649 %x6 = trunc <16 x i32> %x5 to <16 x i8>
650 store <16 x i8> %x6, <16 x i8>* %res, align 1
654 define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
655 ; ALL-LABEL: usat_trunc_qb_512_mem:
657 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
658 ; ALL-NEXT: vzeroupper
660 %x3 = icmp ult <8 x i64> %i, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
661 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
662 %x6 = trunc <8 x i64> %x5 to <8 x i8>
663 store <8 x i8> %x6, <8 x i8>* %res, align 1
667 define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
668 ; ALL-LABEL: usat_trunc_qd_512_mem:
670 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
671 ; ALL-NEXT: vzeroupper
673 %x3 = icmp ult <8 x i64> %i, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
674 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
675 %x6 = trunc <8 x i64> %x5 to <8 x i32>
676 store <8 x i32> %x6, <8 x i32>* %res, align 1
680 define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
681 ; ALL-LABEL: usat_trunc_qw_512_mem:
683 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
684 ; ALL-NEXT: vzeroupper
686 %x3 = icmp ult <8 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
687 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
688 %x6 = trunc <8 x i64> %x5 to <8 x i16>
689 store <8 x i16> %x6, <8 x i16>* %res, align 1
693 define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
694 ; ALL-LABEL: usat_trunc_db_1024:
696 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
697 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
698 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
700 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
701 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
702 %x6 = trunc <32 x i32> %x5 to <32 x i8>
706 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
707 ; ALL-LABEL: usat_trunc_db_1024_mem:
709 ; ALL-NEXT: vpmovusdb %zmm1, 16(%rdi)
710 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
711 ; ALL-NEXT: vzeroupper
713 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
714 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
715 %x6 = trunc <32 x i32> %x5 to <32 x i8>
716 store <32 x i8>%x6, <32 x i8>* %p, align 1
720 define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
721 ; ALL-LABEL: usat_trunc_dw_512:
723 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
725 %x3 = icmp ult <16 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
726 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
727 %x6 = trunc <16 x i32> %x5 to <16 x i16>
731 define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
732 ; KNL-LABEL: usat_trunc_wb_128:
734 ; KNL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
735 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
738 ; SKX-LABEL: usat_trunc_wb_128:
740 ; SKX-NEXT: vpmovuswb %xmm0, %xmm0
742 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
743 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
744 %x6 = trunc <8 x i16> %x5 to <8 x i8>
748 define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
749 ; ALL-LABEL: usat_trunc_qw_1024:
751 ; ALL-NEXT: vpmovusqw %zmm0, %xmm0
752 ; ALL-NEXT: vpmovusqw %zmm1, %xmm1
753 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
755 %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
756 %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
757 %x6 = trunc <16 x i64> %x5 to <16 x i16>
761 define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
762 ; KNL-LABEL: usat_trunc_db_256:
764 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
765 ; KNL-NEXT: vpmovusdb %zmm0, %xmm0
766 ; KNL-NEXT: vzeroupper
769 ; SKX-LABEL: usat_trunc_db_256:
771 ; SKX-NEXT: vpmovusdb %ymm0, %xmm0
772 ; SKX-NEXT: vzeroupper
774 %tmp1 = icmp ult <8 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
775 %tmp2 = select <8 x i1> %tmp1, <8 x i32> %x, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
776 %tmp3 = trunc <8 x i32> %tmp2 to <8 x i8>
777 %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
783 ; Tests for the following unsigned saturation pattern:
785 ; %a = icmp sgt %x, C1
786 ; %b = select %a, %x, C2
787 ; %c = icmp slt %b, C2
788 ; %d = select %c, %b, C2
792 define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, <16 x i8>* %res) {
793 ; KNL-LABEL: smax_usat_trunc_wb_256_mem1:
795 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
796 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
797 ; KNL-NEXT: vmovdqu %xmm0, (%rdi)
798 ; KNL-NEXT: vzeroupper
801 ; SKX-LABEL: smax_usat_trunc_wb_256_mem1:
803 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
804 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
805 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
806 ; SKX-NEXT: vzeroupper
808 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
809 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
810 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
811 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
812 %x6 = trunc <16 x i16> %x5 to <16 x i8>
813 store <16 x i8> %x6, <16 x i8>* %res, align 1
817 ; Test for smax(smin(x, C2), C1).
818 define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, <16 x i8>* %res) {
819 ; KNL-LABEL: smax_usat_trunc_wb_256_mem2:
821 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
822 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
823 ; KNL-NEXT: vmovdqu %xmm0, (%rdi)
824 ; KNL-NEXT: vzeroupper
827 ; SKX-LABEL: smax_usat_trunc_wb_256_mem2:
829 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
830 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
831 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
832 ; SKX-NEXT: vzeroupper
834 %x1 = icmp slt <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
835 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
836 %x3 = icmp sgt <16 x i16> %x2, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
837 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
838 %x6 = trunc <16 x i16> %x5 to <16 x i8>
839 store <16 x i8> %x6, <16 x i8>* %res, align 1
843 define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
844 ; KNL-LABEL: smax_usat_trunc_wb_256:
846 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
847 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
848 ; KNL-NEXT: vzeroupper
851 ; SKX-LABEL: smax_usat_trunc_wb_256:
853 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
854 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
855 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
856 ; SKX-NEXT: vzeroupper
858 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
859 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
860 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
861 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
862 %x6 = trunc <16 x i16> %x5 to <16 x i8>
866 define void @smax_usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
867 ; KNL-LABEL: smax_usat_trunc_wb_128_mem:
869 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
870 ; KNL-NEXT: vmovq %xmm0, (%rdi)
873 ; SKX-LABEL: smax_usat_trunc_wb_128_mem:
875 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
876 ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
877 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
879 %x1 = icmp sgt <8 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
880 %x2 = select <8 x i1> %x1, <8 x i16> %i, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
881 %x3 = icmp slt <8 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
882 %x5 = select <8 x i1> %x3, <8 x i16> %x2, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
883 %x6 = trunc <8 x i16> %x5 to <8 x i8>
884 store <8 x i8> %x6, <8 x i8>* %res, align 1
888 define void @smax_usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
889 ; ALL-LABEL: smax_usat_trunc_db_512_mem:
891 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
892 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
893 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
894 ; ALL-NEXT: vzeroupper
896 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
897 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
898 %x3 = icmp slt <16 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
899 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
900 %x6 = trunc <16 x i32> %x5 to <16 x i8>
901 store <16 x i8> %x6, <16 x i8>* %res, align 1
905 define void @smax_usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
906 ; ALL-LABEL: smax_usat_trunc_qb_512_mem:
908 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
909 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
910 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
911 ; ALL-NEXT: vzeroupper
913 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
914 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
915 %x3 = icmp slt <8 x i64> %x2, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
916 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
917 %x6 = trunc <8 x i64> %x5 to <8 x i8>
918 store <8 x i8> %x6, <8 x i8>* %res, align 1
922 define void @smax_usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
923 ; ALL-LABEL: smax_usat_trunc_qd_512_mem:
925 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
926 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
927 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
928 ; ALL-NEXT: vzeroupper
930 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
931 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
932 %x3 = icmp slt <8 x i64> %x2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
933 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
934 %x6 = trunc <8 x i64> %x5 to <8 x i32>
935 store <8 x i32> %x6, <8 x i32>* %res, align 1
939 define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
940 ; ALL-LABEL: smax_usat_trunc_qw_512_mem:
942 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
943 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
944 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
945 ; ALL-NEXT: vzeroupper
947 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
948 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
949 %x3 = icmp slt <8 x i64> %x2, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
950 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
951 %x6 = trunc <8 x i64> %x5 to <8 x i16>
952 store <8 x i16> %x6, <8 x i16>* %res, align 1
956 define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
957 ; ALL-LABEL: smax_usat_trunc_db_1024:
959 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
960 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
961 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
962 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
963 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
964 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
966 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
967 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
968 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
969 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
970 %x6 = trunc <32 x i32> %x5 to <32 x i8>
974 define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
975 ; ALL-LABEL: smax_usat_trunc_db_1024_mem:
977 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
978 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
979 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
980 ; ALL-NEXT: vpmovusdb %zmm1, 16(%rdi)
981 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
982 ; ALL-NEXT: vzeroupper
984 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
985 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
986 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
987 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
988 %x6 = trunc <32 x i32> %x5 to <32 x i8>
989 store <32 x i8>%x6, <32 x i8>* %p, align 1
993 define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) {
994 ; ALL-LABEL: smax_usat_trunc_dw_512:
996 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
997 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
998 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
1000 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1001 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1002 %x3 = icmp slt <16 x i32> %x2, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1003 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1004 %x6 = trunc <16 x i32> %x5 to <16 x i16>
1008 define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
1009 ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
1011 ; KNL-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1012 ; KNL-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1013 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1014 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
1015 ; KNL-NEXT: vzeroupper
1018 ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
1020 ; SKX-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1021 ; SKX-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1022 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1023 ; SKX-NEXT: vzeroupper
1025 %x1 = icmp slt <16 x i16> %i, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1026 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1027 %x3 = icmp sgt <16 x i16> %x2, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1028 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1029 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1030 store <16 x i8> %x6, <16 x i8>* %res, align 1
1034 define void @negative_test2_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
1035 ; KNL-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1037 ; KNL-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1038 ; KNL-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1039 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1040 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
1041 ; KNL-NEXT: vzeroupper
1044 ; SKX-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1046 ; SKX-NEXT: vpmaxsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1047 ; SKX-NEXT: vpminsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1048 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1049 ; SKX-NEXT: vzeroupper
1051 %x1 = icmp sgt <16 x i16> %i, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1052 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1053 %x3 = icmp slt <16 x i16> %x2, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1054 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1055 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1056 store <16 x i8> %x6, <16 x i8>* %res, align 1
1060 define void @ssat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
1061 ; ALL-LABEL: ssat_trunc_db_1024_mem:
1063 ; ALL-NEXT: vpmovsdb %zmm1, 16(%rdi)
1064 ; ALL-NEXT: vpmovsdb %zmm0, (%rdi)
1065 ; ALL-NEXT: vzeroupper
1067 %x1 = icmp sgt <32 x i32> %i, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32
1069 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
1070 %x3 = icmp slt <32 x i32> %x2, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
1071 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
1072 %x6 = trunc <32 x i32> %x5 to <32 x i8>
1073 store <32 x i8>%x6, <32 x i8>* %p, align 1