1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
5 ; This matter of this test is ensuring that vpackus* is not used for umin+trunc combination, since vpackus* input is a signed number.
7 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
8 ; AVX-LABEL: usat_trunc_wb_256:
10 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
11 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
12 ; AVX-NEXT: vpminuw %xmm2, %xmm1, %xmm1
13 ; AVX-NEXT: vpminuw %xmm2, %xmm0, %xmm0
14 ; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
15 ; AVX-NEXT: vzeroupper
18 ; AVX512-LABEL: usat_trunc_wb_256:
20 ; AVX512-NEXT: vpmovuswb %ymm0, %xmm0
21 ; AVX512-NEXT: vzeroupper
23 %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
24 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
25 %x6 = trunc <16 x i16> %x5 to <16 x i8>
29 define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
30 ; AVX-LABEL: usat_trunc_dw_256:
32 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
33 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [65535,65535,65535,65535]
34 ; AVX-NEXT: vpminud %xmm2, %xmm1, %xmm1
35 ; AVX-NEXT: vpminud %xmm2, %xmm0, %xmm0
36 ; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
37 ; AVX-NEXT: vzeroupper
40 ; AVX512-LABEL: usat_trunc_dw_256:
42 ; AVX512-NEXT: vpmovusdw %ymm0, %xmm0
43 ; AVX512-NEXT: vzeroupper
45 %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
46 %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
47 %x6 = trunc <8 x i32> %x5 to <8 x i16>