1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,SKX
5 attributes #0 = { nounwind }
7 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
8 ; ALL-LABEL: trunc_16x32_to_16x8:
10 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
11 ; ALL-NEXT: vzeroupper
13 %x = trunc <16 x i32> %i to <16 x i8>
17 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
18 ; ALL-LABEL: trunc_8x64_to_8x16:
20 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
21 ; ALL-NEXT: vzeroupper
23 %x = trunc <8 x i64> %i to <8 x i16>
27 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
28 ; ALL-LABEL: trunc_v16i32_to_v16i16:
30 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
32 %1 = trunc <16 x i32> %x to <16 x i16>
36 define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
37 ; ALL-LABEL: trunc_qb_512:
39 ; ALL-NEXT: vpmovqb %zmm0, %xmm0
40 ; ALL-NEXT: vzeroupper
42 %x = trunc <8 x i64> %i to <8 x i8>
46 define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
47 ; ALL-LABEL: trunc_qb_512_mem:
49 ; ALL-NEXT: vpmovqb %zmm0, (%rdi)
50 ; ALL-NEXT: vzeroupper
52 %x = trunc <8 x i64> %i to <8 x i8>
53 store <8 x i8> %x, <8 x i8>* %res
57 define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
58 ; KNL-LABEL: trunc_qb_256:
60 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
61 ; KNL-NEXT: vpmovqb %zmm0, %xmm0
62 ; KNL-NEXT: vzeroupper
65 ; SKX-LABEL: trunc_qb_256:
67 ; SKX-NEXT: vpmovqb %ymm0, %xmm0
68 ; SKX-NEXT: vzeroupper
70 %x = trunc <4 x i64> %i to <4 x i8>
74 define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
75 ; KNL-LABEL: trunc_qb_256_mem:
77 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
78 ; KNL-NEXT: vpmovqb %zmm0, %xmm0
79 ; KNL-NEXT: vmovd %xmm0, (%rdi)
80 ; KNL-NEXT: vzeroupper
83 ; SKX-LABEL: trunc_qb_256_mem:
85 ; SKX-NEXT: vpmovqb %ymm0, (%rdi)
86 ; SKX-NEXT: vzeroupper
88 %x = trunc <4 x i64> %i to <4 x i8>
89 store <4 x i8> %x, <4 x i8>* %res
93 define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
94 ; ALL-LABEL: trunc_qb_128:
96 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
98 %x = trunc <2 x i64> %i to <2 x i8>
102 define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
103 ; KNL-LABEL: trunc_qb_128_mem:
105 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
106 ; KNL-NEXT: vpextrw $0, %xmm0, (%rdi)
109 ; SKX-LABEL: trunc_qb_128_mem:
111 ; SKX-NEXT: vpmovqb %xmm0, (%rdi)
113 %x = trunc <2 x i64> %i to <2 x i8>
114 store <2 x i8> %x, <2 x i8>* %res
118 define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
119 ; ALL-LABEL: trunc_qw_512:
121 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
122 ; ALL-NEXT: vzeroupper
124 %x = trunc <8 x i64> %i to <8 x i16>
128 define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
129 ; ALL-LABEL: trunc_qw_512_mem:
131 ; ALL-NEXT: vpmovqw %zmm0, (%rdi)
132 ; ALL-NEXT: vzeroupper
134 %x = trunc <8 x i64> %i to <8 x i16>
135 store <8 x i16> %x, <8 x i16>* %res
139 define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
140 ; KNL-LABEL: trunc_qw_256:
142 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
143 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
144 ; KNL-NEXT: vzeroupper
147 ; SKX-LABEL: trunc_qw_256:
149 ; SKX-NEXT: vpmovqw %ymm0, %xmm0
150 ; SKX-NEXT: vzeroupper
152 %x = trunc <4 x i64> %i to <4 x i16>
156 define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
157 ; KNL-LABEL: trunc_qw_256_mem:
159 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
160 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
161 ; KNL-NEXT: vmovq %xmm0, (%rdi)
162 ; KNL-NEXT: vzeroupper
165 ; SKX-LABEL: trunc_qw_256_mem:
167 ; SKX-NEXT: vpmovqw %ymm0, (%rdi)
168 ; SKX-NEXT: vzeroupper
170 %x = trunc <4 x i64> %i to <4 x i16>
171 store <4 x i16> %x, <4 x i16>* %res
175 define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
176 ; KNL-LABEL: trunc_qw_128:
178 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
179 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
182 ; SKX-LABEL: trunc_qw_128:
184 ; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
186 %x = trunc <2 x i64> %i to <2 x i16>
190 define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
191 ; KNL-LABEL: trunc_qw_128_mem:
193 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
194 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
195 ; KNL-NEXT: vmovd %xmm0, (%rdi)
198 ; SKX-LABEL: trunc_qw_128_mem:
200 ; SKX-NEXT: vpmovqw %xmm0, (%rdi)
202 %x = trunc <2 x i64> %i to <2 x i16>
203 store <2 x i16> %x, <2 x i16>* %res
207 define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
208 ; ALL-LABEL: trunc_qd_512:
210 ; ALL-NEXT: vpmovqd %zmm0, %ymm0
212 %x = trunc <8 x i64> %i to <8 x i32>
216 define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
217 ; ALL-LABEL: trunc_qd_512_mem:
219 ; ALL-NEXT: vpmovqd %zmm0, (%rdi)
220 ; ALL-NEXT: vzeroupper
222 %x = trunc <8 x i64> %i to <8 x i32>
223 store <8 x i32> %x, <8 x i32>* %res
227 define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
228 ; KNL-LABEL: trunc_qd_256:
230 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
231 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
232 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
233 ; KNL-NEXT: vzeroupper
236 ; SKX-LABEL: trunc_qd_256:
238 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
239 ; SKX-NEXT: vzeroupper
241 %x = trunc <4 x i64> %i to <4 x i32>
245 define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
246 ; KNL-LABEL: trunc_qd_256_mem:
248 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
249 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
250 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
251 ; KNL-NEXT: vzeroupper
254 ; SKX-LABEL: trunc_qd_256_mem:
256 ; SKX-NEXT: vpmovqd %ymm0, (%rdi)
257 ; SKX-NEXT: vzeroupper
259 %x = trunc <4 x i64> %i to <4 x i32>
260 store <4 x i32> %x, <4 x i32>* %res
264 define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
265 ; ALL-LABEL: trunc_qd_128:
267 ; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
269 %x = trunc <2 x i64> %i to <2 x i32>
273 define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
274 ; KNL-LABEL: trunc_qd_128_mem:
276 ; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
277 ; KNL-NEXT: vmovlps %xmm0, (%rdi)
280 ; SKX-LABEL: trunc_qd_128_mem:
282 ; SKX-NEXT: vpmovqd %xmm0, (%rdi)
284 %x = trunc <2 x i64> %i to <2 x i32>
285 store <2 x i32> %x, <2 x i32>* %res
289 define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
290 ; ALL-LABEL: trunc_db_512:
292 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
293 ; ALL-NEXT: vzeroupper
295 %x = trunc <16 x i32> %i to <16 x i8>
299 define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
300 ; ALL-LABEL: trunc_db_512_mem:
302 ; ALL-NEXT: vpmovdb %zmm0, (%rdi)
303 ; ALL-NEXT: vzeroupper
305 %x = trunc <16 x i32> %i to <16 x i8>
306 store <16 x i8> %x, <16 x i8>* %res
310 define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
311 ; KNL-LABEL: trunc_db_256:
313 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
314 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
315 ; KNL-NEXT: vzeroupper
318 ; SKX-LABEL: trunc_db_256:
320 ; SKX-NEXT: vpmovdb %ymm0, %xmm0
321 ; SKX-NEXT: vzeroupper
323 %x = trunc <8 x i32> %i to <8 x i8>
327 define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
328 ; KNL-LABEL: trunc_db_256_mem:
330 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
331 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
332 ; KNL-NEXT: vmovq %xmm0, (%rdi)
333 ; KNL-NEXT: vzeroupper
336 ; SKX-LABEL: trunc_db_256_mem:
338 ; SKX-NEXT: vpmovdb %ymm0, (%rdi)
339 ; SKX-NEXT: vzeroupper
341 %x = trunc <8 x i32> %i to <8 x i8>
342 store <8 x i8> %x, <8 x i8>* %res
346 define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
347 ; ALL-LABEL: trunc_db_128:
349 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
351 %x = trunc <4 x i32> %i to <4 x i8>
355 define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
356 ; KNL-LABEL: trunc_db_128_mem:
358 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
359 ; KNL-NEXT: vmovd %xmm0, (%rdi)
362 ; SKX-LABEL: trunc_db_128_mem:
364 ; SKX-NEXT: vpmovdb %xmm0, (%rdi)
366 %x = trunc <4 x i32> %i to <4 x i8>
367 store <4 x i8> %x, <4 x i8>* %res
371 define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
372 ; ALL-LABEL: trunc_dw_512:
374 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
376 %x = trunc <16 x i32> %i to <16 x i16>
380 define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
381 ; ALL-LABEL: trunc_dw_512_mem:
383 ; ALL-NEXT: vpmovdw %zmm0, (%rdi)
384 ; ALL-NEXT: vzeroupper
386 %x = trunc <16 x i32> %i to <16 x i16>
387 store <16 x i16> %x, <16 x i16>* %res
391 define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
392 ; KNL-LABEL: trunc_dw_256:
394 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
395 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
396 ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
397 ; KNL-NEXT: vzeroupper
400 ; SKX-LABEL: trunc_dw_256:
402 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
403 ; SKX-NEXT: vzeroupper
405 %x = trunc <8 x i32> %i to <8 x i16>
409 define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
410 ; KNL-LABEL: trunc_dw_256_mem:
412 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
413 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
414 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
415 ; KNL-NEXT: vzeroupper
418 ; SKX-LABEL: trunc_dw_256_mem:
420 ; SKX-NEXT: vpmovdw %ymm0, (%rdi)
421 ; SKX-NEXT: vzeroupper
423 %x = trunc <8 x i32> %i to <8 x i16>
424 store <8 x i16> %x, <8 x i16>* %res
428 define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
429 ; KNL-LABEL: trunc_dw_128_mem:
431 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
432 ; KNL-NEXT: vmovq %xmm0, (%rdi)
435 ; SKX-LABEL: trunc_dw_128_mem:
437 ; SKX-NEXT: vpmovdw %xmm0, (%rdi)
439 %x = trunc <4 x i32> %i to <4 x i16>
440 store <4 x i16> %x, <4 x i16>* %res
444 define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
445 ; KNL-LABEL: trunc_wb_512:
447 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
448 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
449 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
450 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
451 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
452 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
455 ; SKX-LABEL: trunc_wb_512:
457 ; SKX-NEXT: vpmovwb %zmm0, %ymm0
459 %x = trunc <32 x i16> %i to <32 x i8>
463 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
464 ; KNL-LABEL: trunc_wb_512_mem:
466 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
467 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
468 ; KNL-NEXT: vpmovdb %zmm1, 16(%rdi)
469 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
470 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
471 ; KNL-NEXT: vzeroupper
474 ; SKX-LABEL: trunc_wb_512_mem:
476 ; SKX-NEXT: vpmovwb %zmm0, (%rdi)
477 ; SKX-NEXT: vzeroupper
479 %x = trunc <32 x i16> %i to <32 x i8>
480 store <32 x i8> %x, <32 x i8>* %res
484 define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
485 ; KNL-LABEL: trunc_wb_256:
487 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
488 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
489 ; KNL-NEXT: vzeroupper
492 ; SKX-LABEL: trunc_wb_256:
494 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
495 ; SKX-NEXT: vzeroupper
497 %x = trunc <16 x i16> %i to <16 x i8>
501 define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
502 ; KNL-LABEL: trunc_wb_256_mem:
504 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
505 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
506 ; KNL-NEXT: vzeroupper
509 ; SKX-LABEL: trunc_wb_256_mem:
511 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
512 ; SKX-NEXT: vzeroupper
514 %x = trunc <16 x i16> %i to <16 x i8>
515 store <16 x i8> %x, <16 x i8>* %res
519 define <16 x i8> @trunc_wb_256_mem_and_ret(<16 x i16> %i, <16 x i8>* %res) #0 {
520 ; KNL-LABEL: trunc_wb_256_mem_and_ret:
522 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
523 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
524 ; KNL-NEXT: vmovdqa %xmm0, (%rdi)
525 ; KNL-NEXT: vzeroupper
528 ; SKX-LABEL: trunc_wb_256_mem_and_ret:
530 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
531 ; SKX-NEXT: vmovdqa %xmm0, (%rdi)
532 ; SKX-NEXT: vzeroupper
534 %x = trunc <16 x i16> %i to <16 x i8>
535 store <16 x i8> %x, <16 x i8>* %res
539 define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
540 ; ALL-LABEL: trunc_wb_128:
542 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
544 %x = trunc <8 x i16> %i to <8 x i8>
548 define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
549 ; KNL-LABEL: trunc_wb_128_mem:
551 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
552 ; KNL-NEXT: vmovq %xmm0, (%rdi)
555 ; SKX-LABEL: trunc_wb_128_mem:
557 ; SKX-NEXT: vpmovwb %xmm0, (%rdi)
559 %x = trunc <8 x i16> %i to <8 x i8>
560 store <8 x i8> %x, <8 x i8>* %res
565 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
566 ; KNL-LABEL: usat_trunc_wb_256_mem:
568 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
569 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
570 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
571 ; KNL-NEXT: vzeroupper
574 ; SKX-LABEL: usat_trunc_wb_256_mem:
576 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
577 ; SKX-NEXT: vzeroupper
579 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
580 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
581 %x6 = trunc <16 x i16> %x5 to <16 x i8>
582 store <16 x i8> %x6, <16 x i8>* %res, align 1
586 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
587 ; KNL-LABEL: usat_trunc_wb_256:
589 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
590 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
591 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
592 ; KNL-NEXT: vzeroupper
595 ; SKX-LABEL: usat_trunc_wb_256:
597 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
598 ; SKX-NEXT: vzeroupper
600 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
601 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
602 %x6 = trunc <16 x i16> %x5 to <16 x i8>
606 define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
607 ; KNL-LABEL: usat_trunc_wb_128_mem:
609 ; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
610 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
611 ; KNL-NEXT: vmovq %xmm0, (%rdi)
614 ; SKX-LABEL: usat_trunc_wb_128_mem:
616 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
618 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
619 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
620 %x6 = trunc <8 x i16> %x5 to <8 x i8>
621 store <8 x i8> %x6, <8 x i8>* %res, align 1
625 define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
626 ; ALL-LABEL: usat_trunc_db_512_mem:
628 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
629 ; ALL-NEXT: vzeroupper
631 %x3 = icmp ult <16 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
632 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
633 %x6 = trunc <16 x i32> %x5 to <16 x i8>
634 store <16 x i8> %x6, <16 x i8>* %res, align 1
638 define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
639 ; ALL-LABEL: usat_trunc_qb_512_mem:
641 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
642 ; ALL-NEXT: vzeroupper
644 %x3 = icmp ult <8 x i64> %i, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
645 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
646 %x6 = trunc <8 x i64> %x5 to <8 x i8>
647 store <8 x i8> %x6, <8 x i8>* %res, align 1
651 define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
652 ; ALL-LABEL: usat_trunc_qd_512_mem:
654 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
655 ; ALL-NEXT: vzeroupper
657 %x3 = icmp ult <8 x i64> %i, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
658 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
659 %x6 = trunc <8 x i64> %x5 to <8 x i32>
660 store <8 x i32> %x6, <8 x i32>* %res, align 1
664 define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
665 ; ALL-LABEL: usat_trunc_qw_512_mem:
667 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
668 ; ALL-NEXT: vzeroupper
670 %x3 = icmp ult <8 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
671 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
672 %x6 = trunc <8 x i64> %x5 to <8 x i16>
673 store <8 x i16> %x6, <8 x i16>* %res, align 1
677 define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
678 ; ALL-LABEL: usat_trunc_db_1024:
680 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
681 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
682 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
684 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
685 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
686 %x6 = trunc <32 x i32> %x5 to <32 x i8>
690 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
691 ; ALL-LABEL: usat_trunc_db_1024_mem:
693 ; ALL-NEXT: vpmovusdb %zmm1, 16(%rdi)
694 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
695 ; ALL-NEXT: vzeroupper
697 %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
698 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
699 %x6 = trunc <32 x i32> %x5 to <32 x i8>
700 store <32 x i8>%x6, <32 x i8>* %p, align 1
704 define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
705 ; ALL-LABEL: usat_trunc_dw_512:
707 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
709 %x3 = icmp ult <16 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
710 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
711 %x6 = trunc <16 x i32> %x5 to <16 x i16>
715 define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
716 ; KNL-LABEL: usat_trunc_wb_128:
718 ; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
719 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
722 ; SKX-LABEL: usat_trunc_wb_128:
724 ; SKX-NEXT: vpmovuswb %xmm0, %xmm0
726 %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
727 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
728 %x6 = trunc <8 x i16> %x5 to <8 x i8>
732 define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
733 ; ALL-LABEL: usat_trunc_qw_1024:
735 ; ALL-NEXT: vpmovusqw %zmm0, %xmm0
736 ; ALL-NEXT: vpmovusqw %zmm1, %xmm1
737 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
739 %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
740 %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
741 %x6 = trunc <16 x i64> %x5 to <16 x i16>
745 define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
746 ; KNL-LABEL: usat_trunc_db_256:
748 ; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
749 ; KNL-NEXT: vpmovusdb %zmm0, %xmm0
750 ; KNL-NEXT: vzeroupper
753 ; SKX-LABEL: usat_trunc_db_256:
755 ; SKX-NEXT: vpmovusdb %ymm0, %xmm0
756 ; SKX-NEXT: vzeroupper
758 %tmp1 = icmp ult <8 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
759 %tmp2 = select <8 x i1> %tmp1, <8 x i32> %x, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
760 %tmp3 = trunc <8 x i32> %tmp2 to <8 x i8>
761 %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
767 ; Tests for the following unsigned saturation pattern:
769 ; %a = icmp sgt %x, C1
770 ; %b = select %a, %x, C2
771 ; %c = icmp slt %b, C2
772 ; %d = select %c, %b, C2
776 define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, <16 x i8>* %res) {
777 ; KNL-LABEL: smax_usat_trunc_wb_256_mem1:
779 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
780 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
781 ; KNL-NEXT: vmovdqu %xmm0, (%rdi)
782 ; KNL-NEXT: vzeroupper
785 ; SKX-LABEL: smax_usat_trunc_wb_256_mem1:
787 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
788 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
789 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
790 ; SKX-NEXT: vzeroupper
792 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
793 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
794 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
795 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
796 %x6 = trunc <16 x i16> %x5 to <16 x i8>
797 store <16 x i8> %x6, <16 x i8>* %res, align 1
801 ; Test for smax(smin(x, C2), C1).
802 define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, <16 x i8>* %res) {
803 ; KNL-LABEL: smax_usat_trunc_wb_256_mem2:
805 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
806 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
807 ; KNL-NEXT: vmovdqu %xmm0, (%rdi)
808 ; KNL-NEXT: vzeroupper
811 ; SKX-LABEL: smax_usat_trunc_wb_256_mem2:
813 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
814 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
815 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
816 ; SKX-NEXT: vzeroupper
818 %x1 = icmp slt <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
819 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
820 %x3 = icmp sgt <16 x i16> %x2, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
821 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
822 %x6 = trunc <16 x i16> %x5 to <16 x i8>
823 store <16 x i8> %x6, <16 x i8>* %res, align 1
827 define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) {
828 ; KNL-LABEL: smax_usat_trunc_wb_256:
830 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
831 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
832 ; KNL-NEXT: vzeroupper
835 ; SKX-LABEL: smax_usat_trunc_wb_256:
837 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
838 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
839 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
840 ; SKX-NEXT: vzeroupper
842 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
843 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
844 %x3 = icmp slt <16 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
845 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
846 %x6 = trunc <16 x i16> %x5 to <16 x i8>
850 define void @smax_usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
851 ; KNL-LABEL: smax_usat_trunc_wb_128_mem:
853 ; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
854 ; KNL-NEXT: vmovq %xmm0, (%rdi)
857 ; SKX-LABEL: smax_usat_trunc_wb_128_mem:
859 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
860 ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
861 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
863 %x1 = icmp sgt <8 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
864 %x2 = select <8 x i1> %x1, <8 x i16> %i, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
865 %x3 = icmp slt <8 x i16> %x2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
866 %x5 = select <8 x i1> %x3, <8 x i16> %x2, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
867 %x6 = trunc <8 x i16> %x5 to <8 x i8>
868 store <8 x i8> %x6, <8 x i8>* %res, align 1
872 define void @smax_usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
873 ; ALL-LABEL: smax_usat_trunc_db_512_mem:
875 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
876 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
877 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
878 ; ALL-NEXT: vzeroupper
880 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
881 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
882 %x3 = icmp slt <16 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
883 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
884 %x6 = trunc <16 x i32> %x5 to <16 x i8>
885 store <16 x i8> %x6, <16 x i8>* %res, align 1
889 define void @smax_usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
890 ; ALL-LABEL: smax_usat_trunc_qb_512_mem:
892 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
893 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
894 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
895 ; ALL-NEXT: vzeroupper
897 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
898 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
899 %x3 = icmp slt <8 x i64> %x2, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
900 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
901 %x6 = trunc <8 x i64> %x5 to <8 x i8>
902 store <8 x i8> %x6, <8 x i8>* %res, align 1
906 define void @smax_usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
907 ; ALL-LABEL: smax_usat_trunc_qd_512_mem:
909 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
910 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
911 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
912 ; ALL-NEXT: vzeroupper
914 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
915 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
916 %x3 = icmp slt <8 x i64> %x2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
917 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
918 %x6 = trunc <8 x i64> %x5 to <8 x i32>
919 store <8 x i32> %x6, <8 x i32>* %res, align 1
923 define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
924 ; ALL-LABEL: smax_usat_trunc_qw_512_mem:
926 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
927 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
928 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
929 ; ALL-NEXT: vzeroupper
931 %x1 = icmp sgt <8 x i64> %i, <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
932 %x2 = select <8 x i1> %x1, <8 x i64> %i, <8 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>
933 %x3 = icmp slt <8 x i64> %x2, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
934 %x5 = select <8 x i1> %x3, <8 x i64> %x2, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
935 %x6 = trunc <8 x i64> %x5 to <8 x i16>
936 store <8 x i16> %x6, <8 x i16>* %res, align 1
940 define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) {
941 ; ALL-LABEL: smax_usat_trunc_db_1024:
943 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
944 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
945 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
946 ; ALL-NEXT: vpmovusdb %zmm0, %xmm0
947 ; ALL-NEXT: vpmovusdb %zmm1, %xmm1
948 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
950 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
951 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
952 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
953 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
954 %x6 = trunc <32 x i32> %x5 to <32 x i8>
958 define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
959 ; ALL-LABEL: smax_usat_trunc_db_1024_mem:
961 ; ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
962 ; ALL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
963 ; ALL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
964 ; ALL-NEXT: vpmovusdb %zmm1, 16(%rdi)
965 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
966 ; ALL-NEXT: vzeroupper
968 %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
969 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
970 %x3 = icmp slt <32 x i32> %x2, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
971 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
972 %x6 = trunc <32 x i32> %x5 to <32 x i8>
973 store <32 x i8>%x6, <32 x i8>* %p, align 1
977 define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) {
978 ; ALL-LABEL: smax_usat_trunc_dw_512:
980 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
981 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
982 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
984 %x1 = icmp sgt <16 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
985 %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
986 %x3 = icmp slt <16 x i32> %x2, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
987 %x5 = select <16 x i1> %x3, <16 x i32> %x2, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
988 %x6 = trunc <16 x i32> %x5 to <16 x i16>
992 define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
993 ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
995 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
996 ; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
997 ; KNL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
998 ; KNL-NEXT: vpminsw %ymm1, %ymm0, %ymm0
999 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1000 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
1001 ; KNL-NEXT: vzeroupper
1004 ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
1006 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1007 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
1008 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
1009 ; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1010 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1011 ; SKX-NEXT: vzeroupper
1013 %x1 = icmp sgt <16 x i16> %i, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
1014 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
1015 %x3 = icmp slt <16 x i16> %x2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1016 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1017 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1018 store <16 x i8> %x6, <16 x i8>* %res, align 1
1022 define void @negative_test2_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
1023 ; KNL-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1025 ; KNL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1026 ; KNL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1027 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1028 ; KNL-NEXT: vpmovdb %zmm0, (%rdi)
1029 ; KNL-NEXT: vzeroupper
1032 ; SKX-LABEL: negative_test2_smax_usat_trunc_wb_256_mem:
1034 ; SKX-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
1035 ; SKX-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0
1036 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
1037 ; SKX-NEXT: vzeroupper
1039 %x1 = icmp sgt <16 x i16> %i, <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1040 %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> <i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10, i16 -10>
1041 %x3 = icmp slt <16 x i16> %x2, <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1042 %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> <i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5, i16 -5>
1043 %x6 = trunc <16 x i16> %x5 to <16 x i8>
1044 store <16 x i8> %x6, <16 x i8>* %res, align 1
1048 define void @ssat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
1049 ; ALL-LABEL: ssat_trunc_db_1024_mem:
1051 ; ALL-NEXT: vpmovsdb %zmm1, 16(%rdi)
1052 ; ALL-NEXT: vpmovsdb %zmm0, (%rdi)
1053 ; ALL-NEXT: vzeroupper
1055 %x1 = icmp sgt <32 x i32> %i, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32
1057 %x2 = select <32 x i1> %x1, <32 x i32> %i, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
1058 %x3 = icmp slt <32 x i32> %x2, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
1059 %x5 = select <32 x i1> %x3, <32 x i32> %x2, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
1060 %x6 = trunc <32 x i32> %x5 to <32 x i8>
1061 store <32 x i8>%x6, <32 x i8>* %p, align 1