1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,SKX
5 attributes #0 = { nounwind }
7 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
8 ; ALL-LABEL: trunc_16x32_to_16x8:
10 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
11 ; ALL-NEXT: vzeroupper
13 %x = trunc <16 x i32> %i to <16 x i8>
17 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
18 ; ALL-LABEL: trunc_8x64_to_8x16:
20 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
21 ; ALL-NEXT: vzeroupper
23 %x = trunc <8 x i64> %i to <8 x i16>
27 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
28 ; ALL-LABEL: trunc_v16i32_to_v16i16:
30 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
32 %1 = trunc <16 x i32> %x to <16 x i16>
36 define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
37 ; ALL-LABEL: trunc_qb_512:
39 ; ALL-NEXT: vpmovqb %zmm0, %xmm0
40 ; ALL-NEXT: vzeroupper
42 %x = trunc <8 x i64> %i to <8 x i8>
46 define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
47 ; ALL-LABEL: trunc_qb_512_mem:
49 ; ALL-NEXT: vpmovqb %zmm0, (%rdi)
50 ; ALL-NEXT: vzeroupper
52 %x = trunc <8 x i64> %i to <8 x i8>
53 store <8 x i8> %x, <8 x i8>* %res
57 define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
58 ; KNL-LABEL: trunc_qb_256:
60 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
61 ; KNL-NEXT: vpmovqb %zmm0, %xmm0
62 ; KNL-NEXT: vzeroupper
65 ; SKX-LABEL: trunc_qb_256:
67 ; SKX-NEXT: vpmovqb %ymm0, %xmm0
68 ; SKX-NEXT: vzeroupper
70 %x = trunc <4 x i64> %i to <4 x i8>
74 define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
75 ; KNL-LABEL: trunc_qb_256_mem:
77 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
78 ; KNL-NEXT: vpmovqb %zmm0, %xmm0
79 ; KNL-NEXT: vmovd %xmm0, (%rdi)
80 ; KNL-NEXT: vzeroupper
83 ; SKX-LABEL: trunc_qb_256_mem:
85 ; SKX-NEXT: vpmovqb %ymm0, (%rdi)
86 ; SKX-NEXT: vzeroupper
88 %x = trunc <4 x i64> %i to <4 x i8>
89 store <4 x i8> %x, <4 x i8>* %res
93 define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
94 ; ALL-LABEL: trunc_qb_128:
96 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
98 %x = trunc <2 x i64> %i to <2 x i8>
102 define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
103 ; KNL-LABEL: trunc_qb_128_mem:
105 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
106 ; KNL-NEXT: vpextrw $0, %xmm0, (%rdi)
109 ; SKX-LABEL: trunc_qb_128_mem:
111 ; SKX-NEXT: vpmovqb %xmm0, (%rdi)
113 %x = trunc <2 x i64> %i to <2 x i8>
114 store <2 x i8> %x, <2 x i8>* %res
118 define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
119 ; ALL-LABEL: trunc_qw_512:
121 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
122 ; ALL-NEXT: vzeroupper
124 %x = trunc <8 x i64> %i to <8 x i16>
128 define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
129 ; ALL-LABEL: trunc_qw_512_mem:
131 ; ALL-NEXT: vpmovqw %zmm0, (%rdi)
132 ; ALL-NEXT: vzeroupper
134 %x = trunc <8 x i64> %i to <8 x i16>
135 store <8 x i16> %x, <8 x i16>* %res
139 define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
140 ; KNL-LABEL: trunc_qw_256:
142 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
143 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
144 ; KNL-NEXT: vzeroupper
147 ; SKX-LABEL: trunc_qw_256:
149 ; SKX-NEXT: vpmovqw %ymm0, %xmm0
150 ; SKX-NEXT: vzeroupper
152 %x = trunc <4 x i64> %i to <4 x i16>
156 define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
157 ; KNL-LABEL: trunc_qw_256_mem:
159 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
160 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
161 ; KNL-NEXT: vmovq %xmm0, (%rdi)
162 ; KNL-NEXT: vzeroupper
165 ; SKX-LABEL: trunc_qw_256_mem:
167 ; SKX-NEXT: vpmovqw %ymm0, (%rdi)
168 ; SKX-NEXT: vzeroupper
170 %x = trunc <4 x i64> %i to <4 x i16>
171 store <4 x i16> %x, <4 x i16>* %res
175 define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
176 ; KNL-LABEL: trunc_qw_128:
178 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
179 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
182 ; SKX-LABEL: trunc_qw_128:
184 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
186 %x = trunc <2 x i64> %i to <2 x i16>
190 define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
191 ; KNL-LABEL: trunc_qw_128_mem:
193 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
194 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
195 ; KNL-NEXT: vmovd %xmm0, (%rdi)
198 ; SKX-LABEL: trunc_qw_128_mem:
200 ; SKX-NEXT: vpmovqw %xmm0, (%rdi)
202 %x = trunc <2 x i64> %i to <2 x i16>
203 store <2 x i16> %x, <2 x i16>* %res
207 define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
208 ; ALL-LABEL: trunc_qd_512:
210 ; ALL-NEXT: vpmovqd %zmm0, %ymm0
212 %x = trunc <8 x i64> %i to <8 x i32>
216 define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
217 ; ALL-LABEL: trunc_qd_512_mem:
219 ; ALL-NEXT: vpmovqd %zmm0, (%rdi)
220 ; ALL-NEXT: vzeroupper
222 %x = trunc <8 x i64> %i to <8 x i32>
223 store <8 x i32> %x, <8 x i32>* %res
227 define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
228 ; KNL-LABEL: trunc_qd_256:
230 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
231 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
232 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
233 ; KNL-NEXT: vzeroupper
236 ; SKX-LABEL: trunc_qd_256:
238 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
239 ; SKX-NEXT: vzeroupper
241 %x = trunc <4 x i64> %i to <4 x i32>
245 define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
246 ; KNL-LABEL: trunc_qd_256_mem:
248 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
249 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
250 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
251 ; KNL-NEXT: vzeroupper
254 ; SKX-LABEL: trunc_qd_256_mem:
256 ; SKX-NEXT: vpmovqd %ymm0, (%rdi)
257 ; SKX-NEXT: vzeroupper
259 %x = trunc <4 x i64> %i to <4 x i32>
260 store <4 x i32> %x, <4 x i32>* %res
264 define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
265 ; ALL-LABEL: trunc_qd_128:
267 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
269 %x = trunc <2 x i64> %i to <2 x i32>
273 define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
274 ; KNL-LABEL: trunc_qd_128_mem:
276 ; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
277 ; KNL-NEXT: vmovlps %xmm0, (%rdi)
280 ; SKX-LABEL: trunc_qd_128_mem:
282 ; SKX-NEXT: vpmovqd %xmm0, (%rdi)
284 %x = trunc <2 x i64> %i to <2 x i32>
285 store <2 x i32> %x, <2 x i32>* %res
289 define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
290 ; ALL-LABEL: trunc_db_512:
292 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
293 ; ALL-NEXT: vzeroupper
295 %x = trunc <16 x i32> %i to <16 x i8>
299 define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
300 ; ALL-LABEL: trunc_db_512_mem:
302 ; ALL-NEXT: vpmovdb %zmm0, (%rdi)
303 ; ALL-NEXT: vzeroupper
305 %x = trunc <16 x i32> %i to <16 x i8>
306 store <16 x i8> %x, <16 x i8>* %res
310 define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
311 ; KNL-LABEL: trunc_db_256:
313 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
314 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
315 ; KNL-NEXT: vzeroupper
318 ; SKX-LABEL: trunc_db_256:
320 ; SKX-NEXT: vpmovdb %ymm0, %xmm0
321 ; SKX-NEXT: vzeroupper
323 %x = trunc <8 x i32> %i to <8 x i8>
327 define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
328 ; KNL-LABEL: trunc_db_256_mem:
330 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
331 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
332 ; KNL-NEXT: vmovq %xmm0, (%rdi)
333 ; KNL-NEXT: vzeroupper
336 ; SKX-LABEL: trunc_db_256_mem:
338 ; SKX-NEXT: vpmovdb %ymm0, (%rdi)
339 ; SKX-NEXT: vzeroupper
341 %x = trunc <8 x i32> %i to <8 x i8>
342 store <8 x i8> %x, <8 x i8>* %res
346 define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
347 ; ALL-LABEL: trunc_db_128:
349 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
351 %x = trunc <4 x i32> %i to <4 x i8>
355 define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
356 ; KNL-LABEL: trunc_db_128_mem:
358 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
359 ; KNL-NEXT: vmovd %xmm0, (%rdi)
362 ; SKX-LABEL: trunc_db_128_mem:
364 ; SKX-NEXT: vpmovdb %xmm0, (%rdi)
366 %x = trunc <4 x i32> %i to <4 x i8>
367 store <4 x i8> %x, <4 x i8>* %res
371 define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
372 ; ALL-LABEL: trunc_dw_512:
374 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
376 %x = trunc <16 x i32> %i to <16 x i16>
380 define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
381 ; ALL-LABEL: trunc_dw_512_mem:
383 ; ALL-NEXT: vpmovdw %zmm0, (%rdi)
384 ; ALL-NEXT: vzeroupper
386 %x = trunc <16 x i32> %i to <16 x i16>
387 store <16 x i16> %x, <16 x i16>* %res
391 define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
392 ; KNL-LABEL: trunc_dw_256:
394 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
395 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
396 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
397 ; KNL-NEXT: vzeroupper
400 ; SKX-LABEL: trunc_dw_256:
402 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
403 ; SKX-NEXT: vzeroupper
405 %x = trunc <8 x i32> %i to <8 x i16>
409 define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
410 ; KNL-LABEL: trunc_dw_256_mem:
412 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
413 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
414 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
415 ; KNL-NEXT: vzeroupper
418 ; SKX-LABEL: trunc_dw_256_mem:
420 ; SKX-NEXT: vpmovdw %ymm0, (%rdi)
421 ; SKX-NEXT: vzeroupper
423 %x = trunc <8 x i32> %i to <8 x i16>
424 store <8 x i16> %x, <8 x i16>* %res
428 define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
429 ; KNL-LABEL: trunc_dw_128_mem:
431 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
432 ; KNL-NEXT: vmovq %xmm0, (%rdi)
435 ; SKX-LABEL: trunc_dw_128_mem:
437 ; SKX-NEXT: vpmovdw %xmm0, (%rdi)
439 %x = trunc <4 x i32> %i to <4 x i16>
440 store <4 x i16> %x, <4 x i16>* %res
444 define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
445 ; KNL-LABEL: trunc_wb_512:
447 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
448 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
449 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
450 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
451 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
452 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
455 ; SKX-LABEL: trunc_wb_512:
457 ; SKX-NEXT: vpmovwb %zmm0, %ymm0
459 %x = trunc <32 x i16> %i to <32 x i8>
463 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
464 ; KNL-LABEL: trunc_wb_512_mem:
466 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
467 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
468 ; KNL-NEXT: vpmovdb %zmm1, 16(%rdi)
469 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
470 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
471 ; KNL-NEXT: vzeroupper
474 ; SKX-LABEL: trunc_wb_512_mem:
476 ; SKX-NEXT: vpmovwb %zmm0, (%rdi)
477 ; SKX-NEXT: vzeroupper
479 %x = trunc <32 x i16> %i to <32 x i8>
480 store <32 x i8> %x, <32 x i8>* %res
484 define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
485 ; KNL-LABEL: trunc_wb_256:
487 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
488 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
489 ; KNL-NEXT: vzeroupper
492 ; SKX-LABEL: trunc_wb_256:
494 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
495 ; SKX-NEXT: vzeroupper
497 %x = trunc <16 x i16> %i to <16 x i8>
501 define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
502 ; KNL-LABEL: trunc_wb_256_mem:
504 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
505 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
506 ; KNL-NEXT: vzeroupper
509 ; SKX-LABEL: trunc_wb_256_mem:
511 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
512 ; SKX-NEXT: vzeroupper
514 %x = trunc <16 x i16> %i to <16 x i8>
515 store <16 x i8> %x, <16 x i8>* %res
519 define <16 x i8> @trunc_wb_256_mem_and_ret(<16 x i16> %i, <16 x i8>* %res) #0 {
520 ; KNL-LABEL: trunc_wb_256_mem_and_ret:
522 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
523 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
524 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
525 ; KNL-NEXT: vzeroupper
528 ; SKX-LABEL: trunc_wb_256_mem_and_ret:
530 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
531 ; SKX-NEXT: vmovdqa %xmm0, (%rdi)
532 ; SKX-NEXT: vzeroupper
534 %x = trunc <16 x i16> %i to <16 x i8>
535 store <16 x i8> %x, <16 x i8>* %res
539 define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
540 ; ALL-LABEL: trunc_wb_128:
542 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
544 %x = trunc <8 x i16> %i to <8 x i8>
548 define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
549 ; KNL-LABEL: trunc_wb_128_mem:
551 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
552 ; KNL-NEXT: vmovq %xmm0, (%rdi)
555 ; SKX-LABEL: trunc_wb_128_mem:
557 ; SKX-NEXT: vpmovwb %xmm0, (%rdi)
559 %x = trunc <8 x i16> %i to <8 x i8>
560 store <8 x i8> %x, <8 x i8>* %res
565 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
566 ; KNL-LABEL: usat_trunc_wb_256_mem:
568 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
569 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
570 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
571 ; KNL-NEXT: vzeroupper
574 ; SKX-LABEL: usat_trunc_wb_256_mem:
576 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
577 ; SKX-NEXT: vzeroupper
579 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
580 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
581 %x6 = trunc <16 x i16> %x5 to <16 x i8>
582 store <16 x i8> %x6, <16 x i8>* %res, align 1
586 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
587 ; KNL-LABEL: usat_trunc_wb_256:
589 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
590 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
591 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
592 ; KNL-NEXT: vzeroupper
595 ; SKX-LABEL: usat_trunc_wb_256:
597 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
598 ; SKX-NEXT: vzeroupper
600 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
601 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
602 %x6 = trunc <16 x i16> %x5 to <16 x i8>
606 define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
607 ; KNL-LABEL: usat_trunc_wb_128_mem:
609 ; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
610 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
611 ; KNL-NEXT: vmovq %xmm0, (%rdi)
614 ; SKX-LABEL: usat_trunc_wb_128_mem:
616 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
618 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
619 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
620 %x6 = trunc <8 x i16> %x5 to <8 x i8>
621 store <8 x i8> %x6, <8 x i8>* %res, align 1
625 define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
626 ; ALL-LABEL: usat_trunc_db_512_mem:
628 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
629 ; ALL-NEXT: vzeroupper
631 %x3 = icmp ult <16 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
632 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
633 %x6 = trunc <16 x i32> %x5 to <16 x i8>
634 store <16 x i8> %x6, <16 x i8>* %res, align 1
638 define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
639 ; ALL-LABEL: usat_trunc_qb_512_mem:
641 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
642 ; ALL-NEXT: vzeroupper
644 %x3 = icmp ult <8 x i64> %i, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
645 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
646 %x6 = trunc <8 x i64> %x5 to <8 x i8>
647 store <8 x i8> %x6, <8 x i8>* %res, align 1
651 define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
652 ; ALL-LABEL: usat_trunc_qd_512_mem:
654 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
655 ; ALL-NEXT: vzeroupper
657 %x3 = icmp ult <8 x i64> %i, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
658 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
659 %x6 = trunc <8 x i64> %x5 to <8 x i32>
660 store <8 x i32> %x6, <8 x i32>* %res, align 1
664 define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
665 ; ALL-LABEL: usat_trunc_qw_512_mem:
667 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
668 ; ALL-NEXT: vzeroupper
670 %x3 = icmp ult <8 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
671 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
672 %x6 = trunc <8 x i64> %x5 to <8 x i16>
673 store <8 x i16> %x6, <8 x i16>* %res, align 1
677 define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
678 ; ALL-LABEL: usat_trunc_db_1024:
680 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
681 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
682 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
684 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
685 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
686 %x6 = trunc <32 x i32> %x5 to <32 x i8>
690 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
691 ; ALL-LABEL: usat_trunc_db_1024_mem:
693 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
694 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
695 ; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
696 ; ALL-NEXT: vmovdqu %xmm0, (%rdi)
697 ; ALL-NEXT: vzeroupper
699 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
700 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
701 %x6 = trunc <32 x i32> %x5 to <32 x i8>
702 store <32 x i8>%x6, <32 x i8>* %p, align 1
706 define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
707 ; ALL-LABEL: usat_trunc_dw_512:
709 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
711 %x3 = icmp ult <16 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
712 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
713 %x6 = trunc <16 x i32> %x5 to <16 x i16>
717 define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
718 ; ALL-LABEL: usat_trunc_wb_128:
720 ; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
721 ; ALL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
723 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
724 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
725 %x6 = trunc <8 x i16> %x5 to <8 x i8>
729 define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
730 ; ALL-LABEL: usat_trunc_qw_1024:
732 ; ALL-NEXT: vpmovusqw %zmm0, %xmm0
733 ; ALL-NEXT: vpmovusqw %zmm1, %xmm1
734 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
736 %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
737 %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
738 %x6 = trunc <16 x i64> %x5 to <16 x i16>
742 define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
743 ; KNL-LABEL: usat_trunc_db_256:
745 ; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
746 ; KNL-NEXT: vpminud %ymm1, %ymm0, %ymm0
747 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
748 ; KNL-NEXT: vzeroupper
751 ; SKX-LABEL: usat_trunc_db_256:
753 ; SKX-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0
754 ; SKX-NEXT: vpmovdb %ymm0, %xmm0
755 ; SKX-NEXT: vzeroupper
757 %tmp1 = icmp ult <8 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
758 %tmp2 = select <8 x i1> %tmp1, <8 x i32> %x, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
759 %tmp3 = trunc <8 x i32> %tmp2 to <8 x i8>
760 %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
766 ; Tests for the following unsigned saturation pattern:
768 ; %a = icmp sgt %x, C1
769 ; %b = select %a, %x, C2
770 ; %c = icmp slt %b, C2
771 ; %d = select %c, %b, C2
775 define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, <16 x i8>* %res) {
776 ; KNL-LABEL: smax_usat_trunc_wb_256_mem1:
778 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
779 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
780 ; KNL-NEXT: vmovdqu %xmm0, (%rdi)
781 ; KNL-NEXT: vzeroupper
784 ; SKX-LABEL: smax_usat_trunc_wb_256_mem1:
786 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
787 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
788 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
789 ; SKX-NEXT: vzeroupper
791 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
792 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
793 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
794 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
795 %x6 = trunc <16 x i16> %x5 to <16 x i8>
796 store <16 x i8> %x6, <16 x i8>* %res, align 1
800 ; Test for smax(smin(x, C2), C1).
801 define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, <16 x i8>* %res) {
802 ; KNL-LABEL: smax_usat_trunc_wb_256_mem2:
804 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
805 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
806 ; KNL-NEXT: vmovdqu %xmm0, (%rdi)
807 ; KNL-NEXT: vzeroupper
810 ; SKX-LABEL: smax_usat_trunc_wb_256_mem2:
812 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
813 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
814 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
815 ; SKX-NEXT: vzeroupper
817 %x1 = icmp slt <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
818 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
819 %x3 = icmp sgt <16 x i16> %x2, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
820 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
821 %x6 = trunc <16 x i16> %x5 to <16 x i8>
822 store <16 x i8> %x6, <16 x i8>* %res, align 1
826 define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
827 ; KNL-LABEL: smax_usat_trunc_wb_256:
829 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
830 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
831 ; KNL-NEXT: vzeroupper
834 ; SKX-LABEL: smax_usat_trunc_wb_256:
836 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
837 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
838 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
839 ; SKX-NEXT: vzeroupper
841 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
842 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
843 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
844 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
845 %x6 = trunc <16 x i16> %x5 to <16 x i8>
849 define void @smax_usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
850 ; KNL-LABEL: smax_usat_trunc_wb_128_mem:
852 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
853 ; KNL-NEXT: vmovq %xmm0, (%rdi)
856 ; SKX-LABEL: smax_usat_trunc_wb_128_mem:
858 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
859 ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
860 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
862 %x1 = icmp sgt <8 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
863 %x2 = select <8 x i1> %x1, <8 x i16> %i, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
864 %x3 = icmp slt <8 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
865 %x5 = select <8 x i1> %x3, <8 x i16> %x2, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
866 %x6 = trunc <8 x i16> %x5 to <8 x i8>
867 store <8 x i8> %x6, <8 x i8>* %res, align 1
871 define void @smax_usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
872 ; ALL-LABEL: smax_usat_trunc_db_512_mem:
874 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
875 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
876 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
877 ; ALL-NEXT: vzeroupper
879 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
880 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
881 %x3 = icmp slt <16 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
882 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
883 %x6 = trunc <16 x i32> %x5 to <16 x i8>
884 store <16 x i8> %x6, <16 x i8>* %res, align 1
888 define void @smax_usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
889 ; ALL-LABEL: smax_usat_trunc_qb_512_mem:
891 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
892 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
893 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
894 ; ALL-NEXT: vzeroupper
896 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
897 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
898 %x3 = icmp slt <8 x i64> %x2, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
899 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
900 %x6 = trunc <8 x i64> %x5 to <8 x i8>
901 store <8 x i8> %x6, <8 x i8>* %res, align 1
905 define void @smax_usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
906 ; ALL-LABEL: smax_usat_trunc_qd_512_mem:
908 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
909 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
910 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
911 ; ALL-NEXT: vzeroupper
913 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
914 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
915 %x3 = icmp slt <8 x i64> %x2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
916 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
917 %x6 = trunc <8 x i64> %x5 to <8 x i32>
918 store <8 x i32> %x6, <8 x i32>* %res, align 1
922 define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
923 ; ALL-LABEL: smax_usat_trunc_qw_512_mem:
925 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
926 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
927 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
928 ; ALL-NEXT: vzeroupper
930 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
931 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
932 %x3 = icmp slt <8 x i64> %x2, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
933 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
934 %x6 = trunc <8 x i64> %x5 to <8 x i16>
935 store <8 x i16> %x6, <8 x i16>* %res, align 1
939 define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
940 ; ALL-LABEL: smax_usat_trunc_db_1024:
942 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
943 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
944 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
945 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
946 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
947 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
949 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
950 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
951 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
952 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
953 %x6 = trunc <32 x i32> %x5 to <32 x i8>
957 define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
958 ; ALL-LABEL: smax_usat_trunc_db_1024_mem:
960 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
961 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
962 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
963 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
964 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
965 ; ALL-NEXT: vmovdqu %xmm1, 16(%rdi)
966 ; ALL-NEXT: vmovdqu %xmm0, (%rdi)
967 ; ALL-NEXT: vzeroupper
969 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
970 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
971 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
972 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
973 %x6 = trunc <32 x i32> %x5 to <32 x i8>
974 store <32 x i8>%x6, <32 x i8>* %p, align 1
978 define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) {
979 ; ALL-LABEL: smax_usat_trunc_dw_512:
981 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
982 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
983 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
985 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
986 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
987 %x3 = icmp slt <16 x i32> %x2, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
988 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
989 %x6 = trunc <16 x i32> %x5 to <16 x i16>
993 define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
994 ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
996 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
997 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
998 ; KNL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
999 ; KNL-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1000 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1001 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
1002 ; KNL-NEXT: vzeroupper
1005 ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
1007 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1008 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
1009 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
1010 ; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1011 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1012 ; SKX-NEXT: vzeroupper
1014 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
1015 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
1016 %x3 = icmp slt <16 x i16> %x2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1017 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1018 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1019 store <16 x i8> %x6, <16 x i8>* %res, align 1
1023 define void @negative_test2_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
1024 ; KNL-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1026 ; KNL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1027 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1028 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1029 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
1030 ; KNL-NEXT: vzeroupper
1033 ; SKX-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1035 ; SKX-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1036 ; SKX-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1037 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1038 ; SKX-NEXT: vzeroupper
1040 %x1 = icmp sgt <16 x i16> %i, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1041 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1042 %x3 = icmp slt <16 x i16> %x2, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1043 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1044 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1045 store <16 x i8> %x6, <16 x i8>* %res, align 1