1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
5 define void @trunc_shl_7_v4i32_v4i64(<4 x i32> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
6 ; SSE2-LABEL: trunc_shl_7_v4i32_v4i64:
8 ; SSE2-NEXT: movaps (%rsi), %xmm0
9 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2]
10 ; SSE2-NEXT: pslld $7, %xmm0
11 ; SSE2-NEXT: movdqa %xmm0, (%rdi)
14 ; AVX2-LABEL: trunc_shl_7_v4i32_v4i64:
16 ; AVX2-NEXT: vmovaps (%rsi), %xmm0
17 ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2]
18 ; AVX2-NEXT: vpslld $7, %xmm0, %xmm0
19 ; AVX2-NEXT: vmovdqa %xmm0, (%rdi)
21 %val = load <4 x i64>, <4 x i64> addrspace(1)* %in
22 %shl = shl <4 x i64> %val, <i64 7, i64 7, i64 7, i64 7>
23 %trunc = trunc <4 x i64> %shl to <4 x i32>
24 store <4 x i32> %trunc, <4 x i32> addrspace(1)* %out
28 define <8 x i16> @trunc_shl_15_v8i16_v8i32(<8 x i32> %a) {
29 ; SSE2-LABEL: trunc_shl_15_v8i16_v8i32:
31 ; SSE2-NEXT: pslld $16, %xmm1
32 ; SSE2-NEXT: psrad $16, %xmm1
33 ; SSE2-NEXT: pslld $16, %xmm0
34 ; SSE2-NEXT: psrad $16, %xmm0
35 ; SSE2-NEXT: packssdw %xmm1, %xmm0
36 ; SSE2-NEXT: psllw $15, %xmm0
39 ; AVX2-LABEL: trunc_shl_15_v8i16_v8i32:
41 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
42 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
43 ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
44 ; AVX2-NEXT: vzeroupper
46 %shl = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
47 %conv = trunc <8 x i32> %shl to <8 x i16>
51 define <8 x i16> @trunc_shl_16_v8i16_v8i32(<8 x i32> %a) {
52 ; SSE2-LABEL: trunc_shl_16_v8i16_v8i32:
54 ; SSE2-NEXT: xorps %xmm0, %xmm0
57 ; AVX2-LABEL: trunc_shl_16_v8i16_v8i32:
59 ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
61 %shl = shl <8 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
62 %conv = trunc <8 x i32> %shl to <8 x i16>
66 define <8 x i16> @trunc_shl_17_v8i16_v8i32(<8 x i32> %a) {
67 ; SSE2-LABEL: trunc_shl_17_v8i16_v8i32:
69 ; SSE2-NEXT: xorps %xmm0, %xmm0
72 ; AVX2-LABEL: trunc_shl_17_v8i16_v8i32:
74 ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
76 %shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
77 %conv = trunc <8 x i32> %shl to <8 x i16>
81 define void @trunc_shl_31_i32_i64(i32* %out, i64* %in) {
82 ; SSE2-LABEL: trunc_shl_31_i32_i64:
84 ; SSE2-NEXT: movl (%rsi), %eax
85 ; SSE2-NEXT: shll $31, %eax
86 ; SSE2-NEXT: movl %eax, (%rdi)
89 ; AVX2-LABEL: trunc_shl_31_i32_i64:
91 ; AVX2-NEXT: movl (%rsi), %eax
92 ; AVX2-NEXT: shll $31, %eax
93 ; AVX2-NEXT: movl %eax, (%rdi)
95 %val = load i64, i64* %in
96 %shl = shl i64 %val, 31
97 %trunc = trunc i64 %shl to i32
98 store i32 %trunc, i32* %out
102 define void @trunc_shl_32_i32_i64(i32* %out, i64* %in) {
103 ; SSE2-LABEL: trunc_shl_32_i32_i64:
105 ; SSE2-NEXT: movl $0, (%rdi)
108 ; AVX2-LABEL: trunc_shl_32_i32_i64:
110 ; AVX2-NEXT: movl $0, (%rdi)
112 %val = load i64, i64* %in
113 %shl = shl i64 %val, 32
114 %trunc = trunc i64 %shl to i32
115 store i32 %trunc, i32* %out
119 define void @trunc_shl_15_i16_i64(i16* %out, i64* %in) {
120 ; SSE2-LABEL: trunc_shl_15_i16_i64:
122 ; SSE2-NEXT: movl (%rsi), %eax
123 ; SSE2-NEXT: shll $15, %eax
124 ; SSE2-NEXT: movw %ax, (%rdi)
127 ; AVX2-LABEL: trunc_shl_15_i16_i64:
129 ; AVX2-NEXT: movl (%rsi), %eax
130 ; AVX2-NEXT: shll $15, %eax
131 ; AVX2-NEXT: movw %ax, (%rdi)
133 %val = load i64, i64* %in
134 %shl = shl i64 %val, 15
135 %trunc = trunc i64 %shl to i16
136 store i16 %trunc, i16* %out
140 define void @trunc_shl_16_i16_i64(i16* %out, i64* %in) {
141 ; SSE2-LABEL: trunc_shl_16_i16_i64:
143 ; SSE2-NEXT: movw $0, (%rdi)
146 ; AVX2-LABEL: trunc_shl_16_i16_i64:
148 ; AVX2-NEXT: movw $0, (%rdi)
150 %val = load i64, i64* %in
151 %shl = shl i64 %val, 16
152 %trunc = trunc i64 %shl to i16
153 store i16 %trunc, i16* %out
157 define void @trunc_shl_7_i8_i64(i8* %out, i64* %in) {
158 ; SSE2-LABEL: trunc_shl_7_i8_i64:
160 ; SSE2-NEXT: movb (%rsi), %al
161 ; SSE2-NEXT: shlb $7, %al
162 ; SSE2-NEXT: movb %al, (%rdi)
165 ; AVX2-LABEL: trunc_shl_7_i8_i64:
167 ; AVX2-NEXT: movb (%rsi), %al
168 ; AVX2-NEXT: shlb $7, %al
169 ; AVX2-NEXT: movb %al, (%rdi)
171 %val = load i64, i64* %in
172 %shl = shl i64 %val, 7
173 %trunc = trunc i64 %shl to i8
174 store i8 %trunc, i8* %out
178 define void @trunc_shl_8_i8_i64(i8* %out, i64* %in) {
179 ; SSE2-LABEL: trunc_shl_8_i8_i64:
181 ; SSE2-NEXT: movb $0, (%rdi)
184 ; AVX2-LABEL: trunc_shl_8_i8_i64:
186 ; AVX2-NEXT: movb $0, (%rdi)
188 %val = load i64, i64* %in
189 %shl = shl i64 %val, 8
190 %trunc = trunc i64 %shl to i8
191 store i8 %trunc, i8* %out