1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-ALL
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-PERLANE
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
17 define <8 x i32> @trunc8i64_8i32_nsw(<8 x i64> %a) {
18 ; SSE-LABEL: trunc8i64_8i32_nsw:
19 ; SSE: # %bb.0: # %entry
20 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
21 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
22 ; SSE-NEXT: movaps %xmm2, %xmm1
25 ; AVX1-LABEL: trunc8i64_8i32_nsw:
26 ; AVX1: # %bb.0: # %entry
27 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
28 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
29 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
32 ; AVX2-SLOW-LABEL: trunc8i64_8i32_nsw:
33 ; AVX2-SLOW: # %bb.0: # %entry
34 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
35 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
36 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
37 ; AVX2-SLOW-NEXT: retq
39 ; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nsw:
40 ; AVX2-FAST-ALL: # %bb.0: # %entry
41 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
42 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
43 ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
44 ; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
45 ; AVX2-FAST-ALL-NEXT: retq
47 ; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nsw:
48 ; AVX2-FAST-PERLANE: # %bb.0: # %entry
49 ; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
50 ; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
51 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
52 ; AVX2-FAST-PERLANE-NEXT: retq
54 ; AVX512-LABEL: trunc8i64_8i32_nsw:
55 ; AVX512: # %bb.0: # %entry
56 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0
59 %0 = trunc nsw <8 x i64> %a to <8 x i32>
63 define <8 x i32> @trunc8i64_8i32_nuw(<8 x i64> %a) {
64 ; SSE-LABEL: trunc8i64_8i32_nuw:
65 ; SSE: # %bb.0: # %entry
66 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
67 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
68 ; SSE-NEXT: movaps %xmm2, %xmm1
71 ; AVX1-LABEL: trunc8i64_8i32_nuw:
72 ; AVX1: # %bb.0: # %entry
73 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
74 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
75 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
78 ; AVX2-SLOW-LABEL: trunc8i64_8i32_nuw:
79 ; AVX2-SLOW: # %bb.0: # %entry
80 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
81 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
82 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
83 ; AVX2-SLOW-NEXT: retq
85 ; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nuw:
86 ; AVX2-FAST-ALL: # %bb.0: # %entry
87 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
88 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
89 ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
90 ; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
91 ; AVX2-FAST-ALL-NEXT: retq
93 ; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nuw:
94 ; AVX2-FAST-PERLANE: # %bb.0: # %entry
95 ; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
96 ; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
97 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
98 ; AVX2-FAST-PERLANE-NEXT: retq
100 ; AVX512-LABEL: trunc8i64_8i32_nuw:
101 ; AVX512: # %bb.0: # %entry
102 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0
105 %0 = trunc nuw <8 x i64> %a to <8 x i32>
109 define <8 x i16> @trunc8i64_8i16_nsw(<8 x i64> %a) {
110 ; SSE2-SSSE3-LABEL: trunc8i64_8i16_nsw:
111 ; SSE2-SSSE3: # %bb.0: # %entry
112 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
113 ; SSE2-SSSE3-NEXT: pslld $16, %xmm2
114 ; SSE2-SSSE3-NEXT: psrad $16, %xmm2
115 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
116 ; SSE2-SSSE3-NEXT: pslld $16, %xmm0
117 ; SSE2-SSSE3-NEXT: psrad $16, %xmm0
118 ; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
119 ; SSE2-SSSE3-NEXT: retq
121 ; SSE41-LABEL: trunc8i64_8i16_nsw:
122 ; SSE41: # %bb.0: # %entry
123 ; SSE41-NEXT: pxor %xmm4, %xmm4
124 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
125 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
126 ; SSE41-NEXT: packusdw %xmm3, %xmm2
127 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
128 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
129 ; SSE41-NEXT: packusdw %xmm1, %xmm0
130 ; SSE41-NEXT: packusdw %xmm2, %xmm0
133 ; AVX1-LABEL: trunc8i64_8i16_nsw:
134 ; AVX1: # %bb.0: # %entry
135 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
136 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
137 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
138 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
139 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
140 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
141 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
142 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
143 ; AVX1-NEXT: vzeroupper
146 ; AVX2-LABEL: trunc8i64_8i16_nsw:
147 ; AVX2: # %bb.0: # %entry
148 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
149 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15]
150 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15]
151 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
152 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
153 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
154 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
155 ; AVX2-NEXT: vzeroupper
158 ; AVX512-LABEL: trunc8i64_8i16_nsw:
159 ; AVX512: # %bb.0: # %entry
160 ; AVX512-NEXT: vpmovqw %zmm0, %xmm0
161 ; AVX512-NEXT: vzeroupper
164 %0 = trunc nsw <8 x i64> %a to <8 x i16>
168 define <8 x i16> @trunc8i64_8i16_nuw(<8 x i64> %a) {
169 ; SSE2-SSSE3-LABEL: trunc8i64_8i16_nuw:
170 ; SSE2-SSSE3: # %bb.0: # %entry
171 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
172 ; SSE2-SSSE3-NEXT: pslld $16, %xmm2
173 ; SSE2-SSSE3-NEXT: psrad $16, %xmm2
174 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
175 ; SSE2-SSSE3-NEXT: pslld $16, %xmm0
176 ; SSE2-SSSE3-NEXT: psrad $16, %xmm0
177 ; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
178 ; SSE2-SSSE3-NEXT: retq
180 ; SSE41-LABEL: trunc8i64_8i16_nuw:
181 ; SSE41: # %bb.0: # %entry
182 ; SSE41-NEXT: pxor %xmm4, %xmm4
183 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
184 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
185 ; SSE41-NEXT: packusdw %xmm3, %xmm2
186 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
187 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
188 ; SSE41-NEXT: packusdw %xmm1, %xmm0
189 ; SSE41-NEXT: packusdw %xmm2, %xmm0
192 ; AVX1-LABEL: trunc8i64_8i16_nuw:
193 ; AVX1: # %bb.0: # %entry
194 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
195 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
196 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
197 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
198 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
199 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
200 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
201 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
202 ; AVX1-NEXT: vzeroupper
205 ; AVX2-LABEL: trunc8i64_8i16_nuw:
206 ; AVX2: # %bb.0: # %entry
207 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
208 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15]
209 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15]
210 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
211 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
212 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
213 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
214 ; AVX2-NEXT: vzeroupper
217 ; AVX512-LABEL: trunc8i64_8i16_nuw:
218 ; AVX512: # %bb.0: # %entry
219 ; AVX512-NEXT: vpmovqw %zmm0, %xmm0
220 ; AVX512-NEXT: vzeroupper
223 %0 = trunc nuw <8 x i64> %a to <8 x i16>
227 define void @trunc8i64_8i8_nsw(<8 x i64> %a) {
228 ; SSE2-SSSE3-LABEL: trunc8i64_8i8_nsw:
229 ; SSE2-SSSE3: # %bb.0: # %entry
230 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
231 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
232 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
233 ; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
234 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
235 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
236 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
237 ; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
238 ; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
239 ; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
240 ; SSE2-SSSE3-NEXT: retq
242 ; SSE41-LABEL: trunc8i64_8i8_nsw:
243 ; SSE41: # %bb.0: # %entry
244 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = [255,255]
245 ; SSE41-NEXT: pand %xmm4, %xmm3
246 ; SSE41-NEXT: pand %xmm4, %xmm2
247 ; SSE41-NEXT: packusdw %xmm3, %xmm2
248 ; SSE41-NEXT: pand %xmm4, %xmm1
249 ; SSE41-NEXT: pand %xmm4, %xmm0
250 ; SSE41-NEXT: packusdw %xmm1, %xmm0
251 ; SSE41-NEXT: packusdw %xmm2, %xmm0
252 ; SSE41-NEXT: packuswb %xmm0, %xmm0
253 ; SSE41-NEXT: movq %xmm0, (%rax)
256 ; AVX1-LABEL: trunc8i64_8i8_nsw:
257 ; AVX1: # %bb.0: # %entry
258 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255]
259 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
260 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
261 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
262 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
263 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
264 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
265 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
266 ; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
267 ; AVX1-NEXT: vmovq %xmm0, (%rax)
268 ; AVX1-NEXT: vzeroupper
271 ; AVX2-LABEL: trunc8i64_8i8_nsw:
272 ; AVX2: # %bb.0: # %entry
273 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
274 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
275 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
276 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
277 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
278 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
279 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
280 ; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
281 ; AVX2-NEXT: vmovq %xmm0, (%rax)
282 ; AVX2-NEXT: vzeroupper
285 ; AVX512-LABEL: trunc8i64_8i8_nsw:
286 ; AVX512: # %bb.0: # %entry
287 ; AVX512-NEXT: vpmovqb %zmm0, (%rax)
288 ; AVX512-NEXT: vzeroupper
291 %0 = trunc nsw <8 x i64> %a to <8 x i8>
292 store <8 x i8> %0, ptr undef, align 4
296 define void @trunc8i64_8i8_nuw(<8 x i64> %a) {
297 ; SSE2-SSSE3-LABEL: trunc8i64_8i8_nuw:
298 ; SSE2-SSSE3: # %bb.0: # %entry
299 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
300 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
301 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
302 ; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
303 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
304 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
305 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
306 ; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
307 ; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
308 ; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
309 ; SSE2-SSSE3-NEXT: retq
311 ; SSE41-LABEL: trunc8i64_8i8_nuw:
312 ; SSE41: # %bb.0: # %entry
313 ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = [255,255]
314 ; SSE41-NEXT: pand %xmm4, %xmm3
315 ; SSE41-NEXT: pand %xmm4, %xmm2
316 ; SSE41-NEXT: packusdw %xmm3, %xmm2
317 ; SSE41-NEXT: pand %xmm4, %xmm1
318 ; SSE41-NEXT: pand %xmm4, %xmm0
319 ; SSE41-NEXT: packusdw %xmm1, %xmm0
320 ; SSE41-NEXT: packusdw %xmm2, %xmm0
321 ; SSE41-NEXT: packuswb %xmm0, %xmm0
322 ; SSE41-NEXT: movq %xmm0, (%rax)
325 ; AVX1-LABEL: trunc8i64_8i8_nuw:
326 ; AVX1: # %bb.0: # %entry
327 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255]
328 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
329 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
330 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
331 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
332 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
333 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
334 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
335 ; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
336 ; AVX1-NEXT: vmovq %xmm0, (%rax)
337 ; AVX1-NEXT: vzeroupper
340 ; AVX2-LABEL: trunc8i64_8i8_nuw:
341 ; AVX2: # %bb.0: # %entry
342 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
343 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
344 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
345 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
346 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
347 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
348 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
349 ; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
350 ; AVX2-NEXT: vmovq %xmm0, (%rax)
351 ; AVX2-NEXT: vzeroupper
354 ; AVX512-LABEL: trunc8i64_8i8_nuw:
355 ; AVX512: # %bb.0: # %entry
356 ; AVX512-NEXT: vpmovqb %zmm0, (%rax)
357 ; AVX512-NEXT: vzeroupper
360 %0 = trunc nuw <8 x i64> %a to <8 x i8>
361 store <8 x i8> %0, ptr undef, align 4
365 define <8 x i16> @trunc8i32_8i16_nsw(<8 x i32> %a) {
366 ; SSE2-LABEL: trunc8i32_8i16_nsw:
367 ; SSE2: # %bb.0: # %entry
368 ; SSE2-NEXT: pslld $16, %xmm1
369 ; SSE2-NEXT: psrad $16, %xmm1
370 ; SSE2-NEXT: pslld $16, %xmm0
371 ; SSE2-NEXT: psrad $16, %xmm0
372 ; SSE2-NEXT: packssdw %xmm1, %xmm0
375 ; SSSE3-LABEL: trunc8i32_8i16_nsw:
376 ; SSSE3: # %bb.0: # %entry
377 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
378 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
379 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
380 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
383 ; SSE41-LABEL: trunc8i32_8i16_nsw:
384 ; SSE41: # %bb.0: # %entry
385 ; SSE41-NEXT: pxor %xmm2, %xmm2
386 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
387 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
388 ; SSE41-NEXT: packusdw %xmm1, %xmm0
391 ; AVX1-LABEL: trunc8i32_8i16_nsw:
392 ; AVX1: # %bb.0: # %entry
393 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
394 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
395 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
396 ; AVX1-NEXT: vzeroupper
399 ; AVX2-LABEL: trunc8i32_8i16_nsw:
400 ; AVX2: # %bb.0: # %entry
401 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
402 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
403 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
404 ; AVX2-NEXT: vzeroupper
407 ; AVX512F-LABEL: trunc8i32_8i16_nsw:
408 ; AVX512F: # %bb.0: # %entry
409 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
410 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
411 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
412 ; AVX512F-NEXT: vzeroupper
415 ; AVX512VL-LABEL: trunc8i32_8i16_nsw:
416 ; AVX512VL: # %bb.0: # %entry
417 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
418 ; AVX512VL-NEXT: vzeroupper
419 ; AVX512VL-NEXT: retq
421 ; AVX512BW-LABEL: trunc8i32_8i16_nsw:
422 ; AVX512BW: # %bb.0: # %entry
423 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
424 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
425 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
426 ; AVX512BW-NEXT: vzeroupper
427 ; AVX512BW-NEXT: retq
429 ; AVX512BWVL-LABEL: trunc8i32_8i16_nsw:
430 ; AVX512BWVL: # %bb.0: # %entry
431 ; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
432 ; AVX512BWVL-NEXT: vzeroupper
433 ; AVX512BWVL-NEXT: retq
435 %0 = trunc nsw <8 x i32> %a to <8 x i16>
439 define <8 x i16> @trunc8i32_8i16_nuw(<8 x i32> %a) {
440 ; SSE2-LABEL: trunc8i32_8i16_nuw:
441 ; SSE2: # %bb.0: # %entry
442 ; SSE2-NEXT: pslld $16, %xmm1
443 ; SSE2-NEXT: psrad $16, %xmm1
444 ; SSE2-NEXT: pslld $16, %xmm0
445 ; SSE2-NEXT: psrad $16, %xmm0
446 ; SSE2-NEXT: packssdw %xmm1, %xmm0
449 ; SSSE3-LABEL: trunc8i32_8i16_nuw:
450 ; SSSE3: # %bb.0: # %entry
451 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
452 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
453 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
454 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
457 ; SSE41-LABEL: trunc8i32_8i16_nuw:
458 ; SSE41: # %bb.0: # %entry
459 ; SSE41-NEXT: pxor %xmm2, %xmm2
460 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
461 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
462 ; SSE41-NEXT: packusdw %xmm1, %xmm0
465 ; AVX1-LABEL: trunc8i32_8i16_nuw:
466 ; AVX1: # %bb.0: # %entry
467 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
468 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
469 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
470 ; AVX1-NEXT: vzeroupper
473 ; AVX2-LABEL: trunc8i32_8i16_nuw:
474 ; AVX2: # %bb.0: # %entry
475 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
476 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
477 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
478 ; AVX2-NEXT: vzeroupper
481 ; AVX512F-LABEL: trunc8i32_8i16_nuw:
482 ; AVX512F: # %bb.0: # %entry
483 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
484 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
485 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
486 ; AVX512F-NEXT: vzeroupper
489 ; AVX512VL-LABEL: trunc8i32_8i16_nuw:
490 ; AVX512VL: # %bb.0: # %entry
491 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
492 ; AVX512VL-NEXT: vzeroupper
493 ; AVX512VL-NEXT: retq
495 ; AVX512BW-LABEL: trunc8i32_8i16_nuw:
496 ; AVX512BW: # %bb.0: # %entry
497 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
498 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
499 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
500 ; AVX512BW-NEXT: vzeroupper
501 ; AVX512BW-NEXT: retq
503 ; AVX512BWVL-LABEL: trunc8i32_8i16_nuw:
504 ; AVX512BWVL: # %bb.0: # %entry
505 ; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
506 ; AVX512BWVL-NEXT: vzeroupper
507 ; AVX512BWVL-NEXT: retq
509 %0 = trunc nuw <8 x i32> %a to <8 x i16>
513 define void @trunc8i32_8i8_nsw(<8 x i32> %a) {
514 ; SSE2-SSSE3-LABEL: trunc8i32_8i8_nsw:
515 ; SSE2-SSSE3: # %bb.0: # %entry
516 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
517 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
518 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
519 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
520 ; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
521 ; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
522 ; SSE2-SSSE3-NEXT: retq
524 ; SSE41-LABEL: trunc8i32_8i8_nsw:
525 ; SSE41: # %bb.0: # %entry
526 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
527 ; SSE41-NEXT: pand %xmm2, %xmm1
528 ; SSE41-NEXT: pand %xmm2, %xmm0
529 ; SSE41-NEXT: packusdw %xmm1, %xmm0
530 ; SSE41-NEXT: packuswb %xmm0, %xmm0
531 ; SSE41-NEXT: movq %xmm0, (%rax)
534 ; AVX1-LABEL: trunc8i32_8i8_nsw:
535 ; AVX1: # %bb.0: # %entry
536 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
537 ; AVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0]
538 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
539 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
540 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
541 ; AVX1-NEXT: vmovq %xmm0, (%rax)
542 ; AVX1-NEXT: vzeroupper
545 ; AVX2-LABEL: trunc8i32_8i8_nsw:
546 ; AVX2: # %bb.0: # %entry
547 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
548 ; AVX2-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0]
549 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
550 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
551 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
552 ; AVX2-NEXT: vmovq %xmm0, (%rax)
553 ; AVX2-NEXT: vzeroupper
556 ; AVX512F-LABEL: trunc8i32_8i8_nsw:
557 ; AVX512F: # %bb.0: # %entry
558 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
559 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
560 ; AVX512F-NEXT: vmovq %xmm0, (%rax)
561 ; AVX512F-NEXT: vzeroupper
564 ; AVX512VL-LABEL: trunc8i32_8i8_nsw:
565 ; AVX512VL: # %bb.0: # %entry
566 ; AVX512VL-NEXT: vpmovdb %ymm0, (%rax)
567 ; AVX512VL-NEXT: vzeroupper
568 ; AVX512VL-NEXT: retq
570 ; AVX512BW-LABEL: trunc8i32_8i8_nsw:
571 ; AVX512BW: # %bb.0: # %entry
572 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
573 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
574 ; AVX512BW-NEXT: vmovq %xmm0, (%rax)
575 ; AVX512BW-NEXT: vzeroupper
576 ; AVX512BW-NEXT: retq
578 ; AVX512BWVL-LABEL: trunc8i32_8i8_nsw:
579 ; AVX512BWVL: # %bb.0: # %entry
580 ; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax)
581 ; AVX512BWVL-NEXT: vzeroupper
582 ; AVX512BWVL-NEXT: retq
584 %0 = trunc nsw <8 x i32> %a to <8 x i8>
585 store <8 x i8> %0, ptr undef, align 4
589 define void @trunc8i32_8i8_nuw(<8 x i32> %a) {
590 ; SSE2-SSSE3-LABEL: trunc8i32_8i8_nuw:
591 ; SSE2-SSSE3: # %bb.0: # %entry
592 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
593 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
594 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
595 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
596 ; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
597 ; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
598 ; SSE2-SSSE3-NEXT: retq
600 ; SSE41-LABEL: trunc8i32_8i8_nuw:
601 ; SSE41: # %bb.0: # %entry
602 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
603 ; SSE41-NEXT: pand %xmm2, %xmm1
604 ; SSE41-NEXT: pand %xmm2, %xmm0
605 ; SSE41-NEXT: packusdw %xmm1, %xmm0
606 ; SSE41-NEXT: packuswb %xmm0, %xmm0
607 ; SSE41-NEXT: movq %xmm0, (%rax)
610 ; AVX1-LABEL: trunc8i32_8i8_nuw:
611 ; AVX1: # %bb.0: # %entry
612 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
613 ; AVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0]
614 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
615 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
616 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
617 ; AVX1-NEXT: vmovq %xmm0, (%rax)
618 ; AVX1-NEXT: vzeroupper
621 ; AVX2-LABEL: trunc8i32_8i8_nuw:
622 ; AVX2: # %bb.0: # %entry
623 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
624 ; AVX2-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0]
625 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
626 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
627 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
628 ; AVX2-NEXT: vmovq %xmm0, (%rax)
629 ; AVX2-NEXT: vzeroupper
632 ; AVX512F-LABEL: trunc8i32_8i8_nuw:
633 ; AVX512F: # %bb.0: # %entry
634 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
635 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
636 ; AVX512F-NEXT: vmovq %xmm0, (%rax)
637 ; AVX512F-NEXT: vzeroupper
640 ; AVX512VL-LABEL: trunc8i32_8i8_nuw:
641 ; AVX512VL: # %bb.0: # %entry
642 ; AVX512VL-NEXT: vpmovdb %ymm0, (%rax)
643 ; AVX512VL-NEXT: vzeroupper
644 ; AVX512VL-NEXT: retq
646 ; AVX512BW-LABEL: trunc8i32_8i8_nuw:
647 ; AVX512BW: # %bb.0: # %entry
648 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
649 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
650 ; AVX512BW-NEXT: vmovq %xmm0, (%rax)
651 ; AVX512BW-NEXT: vzeroupper
652 ; AVX512BW-NEXT: retq
654 ; AVX512BWVL-LABEL: trunc8i32_8i8_nuw:
655 ; AVX512BWVL: # %bb.0: # %entry
656 ; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax)
657 ; AVX512BWVL-NEXT: vzeroupper
658 ; AVX512BWVL-NEXT: retq
660 %0 = trunc nuw <8 x i32> %a to <8 x i8>
661 store <8 x i8> %0, ptr undef, align 4
665 define void @trunc16i32_16i16_nsw(<16 x i32> %a) {
666 ; SSE2-LABEL: trunc16i32_16i16_nsw:
667 ; SSE2: # %bb.0: # %entry
668 ; SSE2-NEXT: pslld $16, %xmm1
669 ; SSE2-NEXT: psrad $16, %xmm1
670 ; SSE2-NEXT: pslld $16, %xmm0
671 ; SSE2-NEXT: psrad $16, %xmm0
672 ; SSE2-NEXT: packssdw %xmm1, %xmm0
673 ; SSE2-NEXT: pslld $16, %xmm3
674 ; SSE2-NEXT: psrad $16, %xmm3
675 ; SSE2-NEXT: pslld $16, %xmm2
676 ; SSE2-NEXT: psrad $16, %xmm2
677 ; SSE2-NEXT: packssdw %xmm3, %xmm2
678 ; SSE2-NEXT: movdqu %xmm2, (%rax)
679 ; SSE2-NEXT: movdqu %xmm0, (%rax)
682 ; SSSE3-LABEL: trunc16i32_16i16_nsw:
683 ; SSSE3: # %bb.0: # %entry
684 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
685 ; SSSE3-NEXT: pshufb %xmm4, %xmm1
686 ; SSSE3-NEXT: pshufb %xmm4, %xmm0
687 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
688 ; SSSE3-NEXT: pshufb %xmm4, %xmm3
689 ; SSSE3-NEXT: pshufb %xmm4, %xmm2
690 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
691 ; SSSE3-NEXT: movdqu %xmm2, (%rax)
692 ; SSSE3-NEXT: movdqu %xmm0, (%rax)
695 ; SSE41-LABEL: trunc16i32_16i16_nsw:
696 ; SSE41: # %bb.0: # %entry
697 ; SSE41-NEXT: pxor %xmm4, %xmm4
698 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
699 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
700 ; SSE41-NEXT: packusdw %xmm1, %xmm0
701 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7]
702 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7]
703 ; SSE41-NEXT: packusdw %xmm3, %xmm2
704 ; SSE41-NEXT: movdqu %xmm2, (%rax)
705 ; SSE41-NEXT: movdqu %xmm0, (%rax)
708 ; AVX1-LABEL: trunc16i32_16i16_nsw:
709 ; AVX1: # %bb.0: # %entry
710 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
711 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
712 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
713 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
714 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
715 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
716 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
717 ; AVX1-NEXT: vmovdqu %xmm1, (%rax)
718 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
719 ; AVX1-NEXT: vzeroupper
722 ; AVX2-LABEL: trunc16i32_16i16_nsw:
723 ; AVX2: # %bb.0: # %entry
724 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
725 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15]
726 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
727 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
728 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
729 ; AVX2-NEXT: vmovdqu %ymm0, (%rax)
730 ; AVX2-NEXT: vzeroupper
733 ; AVX512-LABEL: trunc16i32_16i16_nsw:
734 ; AVX512: # %bb.0: # %entry
735 ; AVX512-NEXT: vpmovdw %zmm0, (%rax)
736 ; AVX512-NEXT: vzeroupper
739 %0 = trunc nsw <16 x i32> %a to <16 x i16>
740 store <16 x i16> %0, ptr undef, align 4
744 define void @trunc16i32_16i16_nuw(<16 x i32> %a) {
745 ; SSE2-LABEL: trunc16i32_16i16_nuw:
746 ; SSE2: # %bb.0: # %entry
747 ; SSE2-NEXT: pslld $16, %xmm1
748 ; SSE2-NEXT: psrad $16, %xmm1
749 ; SSE2-NEXT: pslld $16, %xmm0
750 ; SSE2-NEXT: psrad $16, %xmm0
751 ; SSE2-NEXT: packssdw %xmm1, %xmm0
752 ; SSE2-NEXT: pslld $16, %xmm3
753 ; SSE2-NEXT: psrad $16, %xmm3
754 ; SSE2-NEXT: pslld $16, %xmm2
755 ; SSE2-NEXT: psrad $16, %xmm2
756 ; SSE2-NEXT: packssdw %xmm3, %xmm2
757 ; SSE2-NEXT: movdqu %xmm2, (%rax)
758 ; SSE2-NEXT: movdqu %xmm0, (%rax)
761 ; SSSE3-LABEL: trunc16i32_16i16_nuw:
762 ; SSSE3: # %bb.0: # %entry
763 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
764 ; SSSE3-NEXT: pshufb %xmm4, %xmm1
765 ; SSSE3-NEXT: pshufb %xmm4, %xmm0
766 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
767 ; SSSE3-NEXT: pshufb %xmm4, %xmm3
768 ; SSSE3-NEXT: pshufb %xmm4, %xmm2
769 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
770 ; SSSE3-NEXT: movdqu %xmm2, (%rax)
771 ; SSSE3-NEXT: movdqu %xmm0, (%rax)
774 ; SSE41-LABEL: trunc16i32_16i16_nuw:
775 ; SSE41: # %bb.0: # %entry
776 ; SSE41-NEXT: pxor %xmm4, %xmm4
777 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
778 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
779 ; SSE41-NEXT: packusdw %xmm1, %xmm0
780 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7]
781 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7]
782 ; SSE41-NEXT: packusdw %xmm3, %xmm2
783 ; SSE41-NEXT: movdqu %xmm2, (%rax)
784 ; SSE41-NEXT: movdqu %xmm0, (%rax)
787 ; AVX1-LABEL: trunc16i32_16i16_nuw:
788 ; AVX1: # %bb.0: # %entry
789 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
790 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
791 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
792 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
793 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
794 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
795 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
796 ; AVX1-NEXT: vmovdqu %xmm1, (%rax)
797 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
798 ; AVX1-NEXT: vzeroupper
801 ; AVX2-LABEL: trunc16i32_16i16_nuw:
802 ; AVX2: # %bb.0: # %entry
803 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
804 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15]
805 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
806 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
807 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
808 ; AVX2-NEXT: vmovdqu %ymm0, (%rax)
809 ; AVX2-NEXT: vzeroupper
812 ; AVX512-LABEL: trunc16i32_16i16_nuw:
813 ; AVX512: # %bb.0: # %entry
814 ; AVX512-NEXT: vpmovdw %zmm0, (%rax)
815 ; AVX512-NEXT: vzeroupper
818 %0 = trunc nuw <16 x i32> %a to <16 x i16>
819 store <16 x i16> %0, ptr undef, align 4
823 define void @trunc16i32_16i8_nsw(<16 x i32> %a) {
824 ; SSE2-SSSE3-LABEL: trunc16i32_16i8_nsw:
825 ; SSE2-SSSE3: # %bb.0: # %entry
826 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
827 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
828 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
829 ; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
830 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
831 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
832 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
833 ; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
834 ; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
835 ; SSE2-SSSE3-NEXT: retq
837 ; SSE41-LABEL: trunc16i32_16i8_nsw:
838 ; SSE41: # %bb.0: # %entry
839 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255]
840 ; SSE41-NEXT: pand %xmm4, %xmm3
841 ; SSE41-NEXT: pand %xmm4, %xmm2
842 ; SSE41-NEXT: packusdw %xmm3, %xmm2
843 ; SSE41-NEXT: pand %xmm4, %xmm1
844 ; SSE41-NEXT: pand %xmm4, %xmm0
845 ; SSE41-NEXT: packusdw %xmm1, %xmm0
846 ; SSE41-NEXT: packuswb %xmm2, %xmm0
847 ; SSE41-NEXT: movdqu %xmm0, (%rax)
850 ; AVX1-LABEL: trunc16i32_16i8_nsw:
851 ; AVX1: # %bb.0: # %entry
852 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
853 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
854 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
855 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
856 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
857 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
858 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
859 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
860 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
861 ; AVX1-NEXT: vzeroupper
864 ; AVX2-LABEL: trunc16i32_16i8_nsw:
865 ; AVX2: # %bb.0: # %entry
866 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
867 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
868 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
869 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
870 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
871 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
872 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
873 ; AVX2-NEXT: vmovdqu %xmm0, (%rax)
874 ; AVX2-NEXT: vzeroupper
877 ; AVX512-LABEL: trunc16i32_16i8_nsw:
878 ; AVX512: # %bb.0: # %entry
879 ; AVX512-NEXT: vpmovdb %zmm0, (%rax)
880 ; AVX512-NEXT: vzeroupper
883 %0 = trunc nuw <16 x i32> %a to <16 x i8>
884 store <16 x i8> %0, ptr undef, align 4
888 define void @trunc16i32_16i8_nuw(<16 x i32> %a) {
889 ; SSE2-SSSE3-LABEL: trunc16i32_16i8_nuw:
890 ; SSE2-SSSE3: # %bb.0: # %entry
891 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
892 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
893 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
894 ; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
895 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
896 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
897 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
898 ; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
899 ; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
900 ; SSE2-SSSE3-NEXT: retq
902 ; SSE41-LABEL: trunc16i32_16i8_nuw:
903 ; SSE41: # %bb.0: # %entry
904 ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255]
905 ; SSE41-NEXT: pand %xmm4, %xmm3
906 ; SSE41-NEXT: pand %xmm4, %xmm2
907 ; SSE41-NEXT: packusdw %xmm3, %xmm2
908 ; SSE41-NEXT: pand %xmm4, %xmm1
909 ; SSE41-NEXT: pand %xmm4, %xmm0
910 ; SSE41-NEXT: packusdw %xmm1, %xmm0
911 ; SSE41-NEXT: packuswb %xmm2, %xmm0
912 ; SSE41-NEXT: movdqu %xmm0, (%rax)
915 ; AVX1-LABEL: trunc16i32_16i8_nuw:
916 ; AVX1: # %bb.0: # %entry
917 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
918 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
919 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
920 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
921 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
922 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
923 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
924 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
925 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
926 ; AVX1-NEXT: vzeroupper
929 ; AVX2-LABEL: trunc16i32_16i8_nuw:
930 ; AVX2: # %bb.0: # %entry
931 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
932 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
933 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
934 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
935 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
936 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
937 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
938 ; AVX2-NEXT: vmovdqu %xmm0, (%rax)
939 ; AVX2-NEXT: vzeroupper
942 ; AVX512-LABEL: trunc16i32_16i8_nuw:
943 ; AVX512: # %bb.0: # %entry
944 ; AVX512-NEXT: vpmovdb %zmm0, (%rax)
945 ; AVX512-NEXT: vzeroupper
948 %0 = trunc nuw <16 x i32> %a to <16 x i8>
949 store <16 x i8> %0, ptr undef, align 4
953 define void @trunc16i16_16i8_nsw(<16 x i16> %a) {
954 ; SSE2-SSSE3-LABEL: trunc16i16_16i8_nsw:
955 ; SSE2-SSSE3: # %bb.0: # %entry
956 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
957 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
958 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
959 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
960 ; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
961 ; SSE2-SSSE3-NEXT: retq
963 ; SSE41-LABEL: trunc16i16_16i8_nsw:
964 ; SSE41: # %bb.0: # %entry
965 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
966 ; SSE41-NEXT: pand %xmm2, %xmm1
967 ; SSE41-NEXT: pand %xmm2, %xmm0
968 ; SSE41-NEXT: packuswb %xmm1, %xmm0
969 ; SSE41-NEXT: movdqu %xmm0, (%rax)
972 ; AVX1-LABEL: trunc16i16_16i8_nsw:
973 ; AVX1: # %bb.0: # %entry
974 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
975 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
976 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
977 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
978 ; AVX1-NEXT: vzeroupper
981 ; AVX2-LABEL: trunc16i16_16i8_nsw:
982 ; AVX2: # %bb.0: # %entry
983 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
984 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
985 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
986 ; AVX2-NEXT: vmovdqu %xmm0, (%rax)
987 ; AVX2-NEXT: vzeroupper
990 ; AVX512F-LABEL: trunc16i16_16i8_nsw:
991 ; AVX512F: # %bb.0: # %entry
992 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
993 ; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
994 ; AVX512F-NEXT: vzeroupper
997 ; AVX512VL-LABEL: trunc16i16_16i8_nsw:
998 ; AVX512VL: # %bb.0: # %entry
999 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1000 ; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
1001 ; AVX512VL-NEXT: vzeroupper
1002 ; AVX512VL-NEXT: retq
1004 ; AVX512BW-LABEL: trunc16i16_16i8_nsw:
1005 ; AVX512BW: # %bb.0: # %entry
1006 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1007 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1008 ; AVX512BW-NEXT: vmovdqu %xmm0, (%rax)
1009 ; AVX512BW-NEXT: vzeroupper
1010 ; AVX512BW-NEXT: retq
1012 ; AVX512BWVL-LABEL: trunc16i16_16i8_nsw:
1013 ; AVX512BWVL: # %bb.0: # %entry
1014 ; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax)
1015 ; AVX512BWVL-NEXT: vzeroupper
1016 ; AVX512BWVL-NEXT: retq
1018 %0 = trunc nsw <16 x i16> %a to <16 x i8>
1019 store <16 x i8> %0, ptr undef, align 4
1023 define void @trunc16i16_16i8_nuw(<16 x i16> %a) {
1024 ; SSE2-SSSE3-LABEL: trunc16i16_16i8_nuw:
1025 ; SSE2-SSSE3: # %bb.0: # %entry
1026 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1027 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
1028 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
1029 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
1030 ; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
1031 ; SSE2-SSSE3-NEXT: retq
1033 ; SSE41-LABEL: trunc16i16_16i8_nuw:
1034 ; SSE41: # %bb.0: # %entry
1035 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1036 ; SSE41-NEXT: pand %xmm2, %xmm1
1037 ; SSE41-NEXT: pand %xmm2, %xmm0
1038 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1039 ; SSE41-NEXT: movdqu %xmm0, (%rax)
1042 ; AVX1-LABEL: trunc16i16_16i8_nuw:
1043 ; AVX1: # %bb.0: # %entry
1044 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1045 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1046 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1047 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
1048 ; AVX1-NEXT: vzeroupper
1051 ; AVX2-LABEL: trunc16i16_16i8_nuw:
1052 ; AVX2: # %bb.0: # %entry
1053 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1054 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1055 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1056 ; AVX2-NEXT: vmovdqu %xmm0, (%rax)
1057 ; AVX2-NEXT: vzeroupper
1060 ; AVX512F-LABEL: trunc16i16_16i8_nuw:
1061 ; AVX512F: # %bb.0: # %entry
1062 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1063 ; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
1064 ; AVX512F-NEXT: vzeroupper
1065 ; AVX512F-NEXT: retq
1067 ; AVX512VL-LABEL: trunc16i16_16i8_nuw:
1068 ; AVX512VL: # %bb.0: # %entry
1069 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1070 ; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
1071 ; AVX512VL-NEXT: vzeroupper
1072 ; AVX512VL-NEXT: retq
1074 ; AVX512BW-LABEL: trunc16i16_16i8_nuw:
1075 ; AVX512BW: # %bb.0: # %entry
1076 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1077 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1078 ; AVX512BW-NEXT: vmovdqu %xmm0, (%rax)
1079 ; AVX512BW-NEXT: vzeroupper
1080 ; AVX512BW-NEXT: retq
1082 ; AVX512BWVL-LABEL: trunc16i16_16i8_nuw:
1083 ; AVX512BWVL: # %bb.0: # %entry
1084 ; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax)
1085 ; AVX512BWVL-NEXT: vzeroupper
1086 ; AVX512BWVL-NEXT: retq
1088 %0 = trunc nuw <16 x i16> %a to <16 x i8>
1089 store <16 x i8> %0, ptr undef, align 4
1093 define void @trunc32i16_32i8_nsw(<32 x i16> %a) {
1094 ; SSE2-SSSE3-LABEL: trunc32i16_32i8_nsw:
1095 ; SSE2-SSSE3: # %bb.0: # %entry
1096 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1097 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
1098 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
1099 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
1100 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
1101 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
1102 ; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
1103 ; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax)
1104 ; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
1105 ; SSE2-SSSE3-NEXT: retq
1107 ; SSE41-LABEL: trunc32i16_32i8_nsw:
1108 ; SSE41: # %bb.0: # %entry
1109 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
1110 ; SSE41-NEXT: pand %xmm4, %xmm1
1111 ; SSE41-NEXT: pand %xmm4, %xmm0
1112 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1113 ; SSE41-NEXT: pand %xmm4, %xmm3
1114 ; SSE41-NEXT: pand %xmm4, %xmm2
1115 ; SSE41-NEXT: packuswb %xmm3, %xmm2
1116 ; SSE41-NEXT: movdqu %xmm2, (%rax)
1117 ; SSE41-NEXT: movdqu %xmm0, (%rax)
1120 ; AVX1-LABEL: trunc32i16_32i8_nsw:
1121 ; AVX1: # %bb.0: # %entry
1122 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
1123 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1124 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1125 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
1126 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1127 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1128 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
1129 ; AVX1-NEXT: vmovdqu %xmm1, (%rax)
1130 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
1131 ; AVX1-NEXT: vzeroupper
1134 ; AVX2-LABEL: trunc32i16_32i8_nsw:
1135 ; AVX2: # %bb.0: # %entry
1136 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1137 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1138 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
1139 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1140 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1141 ; AVX2-NEXT: vmovdqu %ymm0, (%rax)
1142 ; AVX2-NEXT: vzeroupper
1145 ; AVX512F-LABEL: trunc32i16_32i8_nsw:
1146 ; AVX512F: # %bb.0: # %entry
1147 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1148 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1149 ; AVX512F-NEXT: vpmovdb %zmm1, (%rax)
1150 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1151 ; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
1152 ; AVX512F-NEXT: vzeroupper
1153 ; AVX512F-NEXT: retq
1155 ; AVX512VL-LABEL: trunc32i16_32i8_nsw:
1156 ; AVX512VL: # %bb.0: # %entry
1157 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1158 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1159 ; AVX512VL-NEXT: vpmovdb %zmm1, (%rax)
1160 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1161 ; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
1162 ; AVX512VL-NEXT: vzeroupper
1163 ; AVX512VL-NEXT: retq
1165 ; AVX512BW-LABEL: trunc32i16_32i8_nsw:
1166 ; AVX512BW: # %bb.0: # %entry
1167 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rax)
1168 ; AVX512BW-NEXT: vzeroupper
1169 ; AVX512BW-NEXT: retq
1171 ; AVX512BWVL-LABEL: trunc32i16_32i8_nsw:
1172 ; AVX512BWVL: # %bb.0: # %entry
1173 ; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax)
1174 ; AVX512BWVL-NEXT: vzeroupper
1175 ; AVX512BWVL-NEXT: retq
1177 %0 = trunc nsw <32 x i16> %a to <32 x i8>
1178 store <32 x i8> %0, ptr undef, align 4
1182 define void @trunc32i16_32i8_nuw(<32 x i16> %a) {
1183 ; SSE2-SSSE3-LABEL: trunc32i16_32i8_nuw:
1184 ; SSE2-SSSE3: # %bb.0: # %entry
1185 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1186 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
1187 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
1188 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
1189 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
1190 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
1191 ; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
1192 ; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax)
1193 ; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
1194 ; SSE2-SSSE3-NEXT: retq
1196 ; SSE41-LABEL: trunc32i16_32i8_nuw:
1197 ; SSE41: # %bb.0: # %entry
1198 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
1199 ; SSE41-NEXT: pand %xmm4, %xmm1
1200 ; SSE41-NEXT: pand %xmm4, %xmm0
1201 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1202 ; SSE41-NEXT: pand %xmm4, %xmm3
1203 ; SSE41-NEXT: pand %xmm4, %xmm2
1204 ; SSE41-NEXT: packuswb %xmm3, %xmm2
1205 ; SSE41-NEXT: movdqu %xmm2, (%rax)
1206 ; SSE41-NEXT: movdqu %xmm0, (%rax)
1209 ; AVX1-LABEL: trunc32i16_32i8_nuw:
1210 ; AVX1: # %bb.0: # %entry
1211 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
1212 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1213 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1214 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
1215 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1216 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1217 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
1218 ; AVX1-NEXT: vmovdqu %xmm1, (%rax)
1219 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
1220 ; AVX1-NEXT: vzeroupper
1223 ; AVX2-LABEL: trunc32i16_32i8_nuw:
1224 ; AVX2: # %bb.0: # %entry
1225 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1226 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1227 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
1228 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1229 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1230 ; AVX2-NEXT: vmovdqu %ymm0, (%rax)
1231 ; AVX2-NEXT: vzeroupper
1234 ; AVX512F-LABEL: trunc32i16_32i8_nuw:
1235 ; AVX512F: # %bb.0: # %entry
1236 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1237 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1238 ; AVX512F-NEXT: vpmovdb %zmm1, (%rax)
1239 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1240 ; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
1241 ; AVX512F-NEXT: vzeroupper
1242 ; AVX512F-NEXT: retq
1244 ; AVX512VL-LABEL: trunc32i16_32i8_nuw:
1245 ; AVX512VL: # %bb.0: # %entry
1246 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1247 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1248 ; AVX512VL-NEXT: vpmovdb %zmm1, (%rax)
1249 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1250 ; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
1251 ; AVX512VL-NEXT: vzeroupper
1252 ; AVX512VL-NEXT: retq
1254 ; AVX512BW-LABEL: trunc32i16_32i8_nuw:
1255 ; AVX512BW: # %bb.0: # %entry
1256 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rax)
1257 ; AVX512BW-NEXT: vzeroupper
1258 ; AVX512BW-NEXT: retq
1260 ; AVX512BWVL-LABEL: trunc32i16_32i8_nuw:
1261 ; AVX512BWVL: # %bb.0: # %entry
1262 ; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax)
1263 ; AVX512BWVL-NEXT: vzeroupper
1264 ; AVX512BWVL-NEXT: retq
1266 %0 = trunc nsw <32 x i16> %a to <32 x i8>
1267 store <32 x i8> %0, ptr undef, align 4
1271 define <8 x i32> @trunc2x4i64_8i32_nsw(<4 x i64> %a, <4 x i64> %b) {
1272 ; SSE-LABEL: trunc2x4i64_8i32_nsw:
1273 ; SSE: # %bb.0: # %entry
1274 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1275 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1276 ; SSE-NEXT: movaps %xmm2, %xmm1
1279 ; AVX1-LABEL: trunc2x4i64_8i32_nsw:
1280 ; AVX1: # %bb.0: # %entry
1281 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1282 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1283 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1286 ; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nsw:
1287 ; AVX2-SLOW: # %bb.0: # %entry
1288 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1289 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1290 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1291 ; AVX2-SLOW-NEXT: retq
1293 ; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nsw:
1294 ; AVX2-FAST-ALL: # %bb.0: # %entry
1295 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1296 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
1297 ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
1298 ; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1299 ; AVX2-FAST-ALL-NEXT: retq
1301 ; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nsw:
1302 ; AVX2-FAST-PERLANE: # %bb.0: # %entry
1303 ; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1304 ; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1305 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1306 ; AVX2-FAST-PERLANE-NEXT: retq
1308 ; AVX512-LABEL: trunc2x4i64_8i32_nsw:
1309 ; AVX512: # %bb.0: # %entry
1310 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1311 ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1312 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0
1315 %0 = trunc nsw <4 x i64> %a to <4 x i32>
1316 %1 = trunc nsw <4 x i64> %b to <4 x i32>
1317 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1321 define <8 x i32> @trunc2x4i64_8i32_nuw(<4 x i64> %a, <4 x i64> %b) {
1322 ; SSE-LABEL: trunc2x4i64_8i32_nuw:
1323 ; SSE: # %bb.0: # %entry
1324 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1325 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1326 ; SSE-NEXT: movaps %xmm2, %xmm1
1329 ; AVX1-LABEL: trunc2x4i64_8i32_nuw:
1330 ; AVX1: # %bb.0: # %entry
1331 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1332 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1333 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1336 ; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nuw:
1337 ; AVX2-SLOW: # %bb.0: # %entry
1338 ; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1339 ; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1340 ; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1341 ; AVX2-SLOW-NEXT: retq
1343 ; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nuw:
1344 ; AVX2-FAST-ALL: # %bb.0: # %entry
1345 ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1346 ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
1347 ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
1348 ; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1349 ; AVX2-FAST-ALL-NEXT: retq
1351 ; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nuw:
1352 ; AVX2-FAST-PERLANE: # %bb.0: # %entry
1353 ; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
1354 ; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1355 ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
1356 ; AVX2-FAST-PERLANE-NEXT: retq
1358 ; AVX512-LABEL: trunc2x4i64_8i32_nuw:
1359 ; AVX512: # %bb.0: # %entry
1360 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1361 ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1362 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0
1365 %0 = trunc nuw <4 x i64> %a to <4 x i32>
1366 %1 = trunc nuw <4 x i64> %b to <4 x i32>
1367 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1371 define <8 x i16> @trunc2x4i64_8i16_nsw(<4 x i64> %a, <4 x i64> %b) {
1372 ; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nsw:
1373 ; SSE2-SSSE3: # %bb.0: # %entry
1374 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1375 ; SSE2-SSSE3-NEXT: pslld $16, %xmm0
1376 ; SSE2-SSSE3-NEXT: psrad $16, %xmm0
1377 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1378 ; SSE2-SSSE3-NEXT: pslld $16, %xmm2
1379 ; SSE2-SSSE3-NEXT: psrad $16, %xmm2
1380 ; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
1381 ; SSE2-SSSE3-NEXT: retq
1383 ; SSE41-LABEL: trunc2x4i64_8i16_nsw:
1384 ; SSE41: # %bb.0: # %entry
1385 ; SSE41-NEXT: pxor %xmm4, %xmm4
1386 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
1387 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
1388 ; SSE41-NEXT: packusdw %xmm1, %xmm0
1389 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
1390 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
1391 ; SSE41-NEXT: packusdw %xmm3, %xmm2
1392 ; SSE41-NEXT: packusdw %xmm2, %xmm0
1395 ; AVX1-LABEL: trunc2x4i64_8i16_nsw:
1396 ; AVX1: # %bb.0: # %entry
1397 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1398 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1399 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
1400 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
1401 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
1402 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1403 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
1404 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
1405 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
1406 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1407 ; AVX1-NEXT: vzeroupper
1410 ; AVX2-LABEL: trunc2x4i64_8i16_nsw:
1411 ; AVX2: # %bb.0: # %entry
1412 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1413 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15]
1414 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
1415 ; AVX2-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
1416 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15]
1417 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1418 ; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
1419 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1420 ; AVX2-NEXT: vzeroupper
1423 ; AVX512F-LABEL: trunc2x4i64_8i16_nsw:
1424 ; AVX512F: # %bb.0: # %entry
1425 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1426 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1427 ; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
1428 ; AVX512F-NEXT: vpmovqw %zmm1, %xmm1
1429 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1430 ; AVX512F-NEXT: vzeroupper
1431 ; AVX512F-NEXT: retq
1433 ; AVX512VL-LABEL: trunc2x4i64_8i16_nsw:
1434 ; AVX512VL: # %bb.0: # %entry
1435 ; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
1436 ; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1
1437 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1438 ; AVX512VL-NEXT: vzeroupper
1439 ; AVX512VL-NEXT: retq
1441 ; AVX512BW-LABEL: trunc2x4i64_8i16_nsw:
1442 ; AVX512BW: # %bb.0: # %entry
1443 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1444 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1445 ; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
1446 ; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1
1447 ; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1448 ; AVX512BW-NEXT: vzeroupper
1449 ; AVX512BW-NEXT: retq
1451 ; AVX512BWVL-LABEL: trunc2x4i64_8i16_nsw:
1452 ; AVX512BWVL: # %bb.0: # %entry
1453 ; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
1454 ; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1
1455 ; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1456 ; AVX512BWVL-NEXT: vzeroupper
1457 ; AVX512BWVL-NEXT: retq
1459 %0 = trunc nsw <4 x i64> %a to <4 x i16>
1460 %1 = trunc nsw <4 x i64> %b to <4 x i16>
1461 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1465 define <8 x i16> @trunc2x4i64_8i16_nuw(<4 x i64> %a, <4 x i64> %b) {
1466 ; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nuw:
1467 ; SSE2-SSSE3: # %bb.0: # %entry
1468 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1469 ; SSE2-SSSE3-NEXT: pslld $16, %xmm0
1470 ; SSE2-SSSE3-NEXT: psrad $16, %xmm0
1471 ; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1472 ; SSE2-SSSE3-NEXT: pslld $16, %xmm2
1473 ; SSE2-SSSE3-NEXT: psrad $16, %xmm2
1474 ; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
1475 ; SSE2-SSSE3-NEXT: retq
1477 ; SSE41-LABEL: trunc2x4i64_8i16_nuw:
1478 ; SSE41: # %bb.0: # %entry
1479 ; SSE41-NEXT: pxor %xmm4, %xmm4
1480 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
1481 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
1482 ; SSE41-NEXT: packusdw %xmm1, %xmm0
1483 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
1484 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
1485 ; SSE41-NEXT: packusdw %xmm3, %xmm2
1486 ; SSE41-NEXT: packusdw %xmm2, %xmm0
1489 ; AVX1-LABEL: trunc2x4i64_8i16_nuw:
1490 ; AVX1: # %bb.0: # %entry
1491 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1492 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1493 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
1494 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
1495 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
1496 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1497 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
1498 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
1499 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
1500 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1501 ; AVX1-NEXT: vzeroupper
1504 ; AVX2-LABEL: trunc2x4i64_8i16_nuw:
1505 ; AVX2: # %bb.0: # %entry
1506 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1507 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15]
1508 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
1509 ; AVX2-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
1510 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15]
1511 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
1512 ; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
1513 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1514 ; AVX2-NEXT: vzeroupper
1517 ; AVX512F-LABEL: trunc2x4i64_8i16_nuw:
1518 ; AVX512F: # %bb.0: # %entry
1519 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1520 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1521 ; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
1522 ; AVX512F-NEXT: vpmovqw %zmm1, %xmm1
1523 ; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1524 ; AVX512F-NEXT: vzeroupper
1525 ; AVX512F-NEXT: retq
1527 ; AVX512VL-LABEL: trunc2x4i64_8i16_nuw:
1528 ; AVX512VL: # %bb.0: # %entry
1529 ; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
1530 ; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1
1531 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1532 ; AVX512VL-NEXT: vzeroupper
1533 ; AVX512VL-NEXT: retq
1535 ; AVX512BW-LABEL: trunc2x4i64_8i16_nuw:
1536 ; AVX512BW: # %bb.0: # %entry
1537 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1538 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1539 ; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
1540 ; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1
1541 ; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1542 ; AVX512BW-NEXT: vzeroupper
1543 ; AVX512BW-NEXT: retq
1545 ; AVX512BWVL-LABEL: trunc2x4i64_8i16_nuw:
1546 ; AVX512BWVL: # %bb.0: # %entry
1547 ; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
1548 ; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1
1549 ; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1550 ; AVX512BWVL-NEXT: vzeroupper
1551 ; AVX512BWVL-NEXT: retq
1553 %0 = trunc nuw <4 x i64> %a to <4 x i16>
1554 %1 = trunc nuw <4 x i64> %b to <4 x i16>
1555 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1559 define <4 x i32> @trunc2x2i64_4i32_nsw(<2 x i64> %a, <2 x i64> %b) {
1560 ; SSE-LABEL: trunc2x2i64_4i32_nsw:
1561 ; SSE: # %bb.0: # %entry
1562 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1565 ; AVX-LABEL: trunc2x2i64_4i32_nsw:
1566 ; AVX: # %bb.0: # %entry
1567 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1570 ; AVX512F-LABEL: trunc2x2i64_4i32_nsw:
1571 ; AVX512F: # %bb.0: # %entry
1572 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1573 ; AVX512F-NEXT: retq
1575 ; AVX512VL-LABEL: trunc2x2i64_4i32_nsw:
1576 ; AVX512VL: # %bb.0: # %entry
1577 ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1578 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1579 ; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
1580 ; AVX512VL-NEXT: vzeroupper
1581 ; AVX512VL-NEXT: retq
1583 ; AVX512BW-LABEL: trunc2x2i64_4i32_nsw:
1584 ; AVX512BW: # %bb.0: # %entry
1585 ; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1586 ; AVX512BW-NEXT: retq
1588 ; AVX512BWVL-LABEL: trunc2x2i64_4i32_nsw:
1589 ; AVX512BWVL: # %bb.0: # %entry
1590 ; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1591 ; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1592 ; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
1593 ; AVX512BWVL-NEXT: vzeroupper
1594 ; AVX512BWVL-NEXT: retq
1596 %0 = trunc nsw <2 x i64> %a to <2 x i32>
1597 %1 = trunc nsw <2 x i64> %b to <2 x i32>
1598 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1602 define <4 x i32> @trunc2x2i64_4i32_nuw(<2 x i64> %a, <2 x i64> %b) {
1603 ; SSE-LABEL: trunc2x2i64_4i32_nuw:
1604 ; SSE: # %bb.0: # %entry
1605 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1608 ; AVX-LABEL: trunc2x2i64_4i32_nuw:
1609 ; AVX: # %bb.0: # %entry
1610 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1613 ; AVX512F-LABEL: trunc2x2i64_4i32_nuw:
1614 ; AVX512F: # %bb.0: # %entry
1615 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1616 ; AVX512F-NEXT: retq
1618 ; AVX512VL-LABEL: trunc2x2i64_4i32_nuw:
1619 ; AVX512VL: # %bb.0: # %entry
1620 ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1621 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1622 ; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
1623 ; AVX512VL-NEXT: vzeroupper
1624 ; AVX512VL-NEXT: retq
1626 ; AVX512BW-LABEL: trunc2x2i64_4i32_nuw:
1627 ; AVX512BW: # %bb.0: # %entry
1628 ; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1629 ; AVX512BW-NEXT: retq
1631 ; AVX512BWVL-LABEL: trunc2x2i64_4i32_nuw:
1632 ; AVX512BWVL: # %bb.0: # %entry
1633 ; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1634 ; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1635 ; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
1636 ; AVX512BWVL-NEXT: vzeroupper
1637 ; AVX512BWVL-NEXT: retq
1639 %0 = trunc nuw <2 x i64> %a to <2 x i32>
1640 %1 = trunc nuw <2 x i64> %b to <2 x i32>
1641 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1645 define <8 x i16> @trunc2x4i32_8i16_nsw(<4 x i32> %a, <4 x i32> %b) {
1646 ; SSE2-LABEL: trunc2x4i32_8i16_nsw:
1647 ; SSE2: # %bb.0: # %entry
1648 ; SSE2-NEXT: pslld $16, %xmm1
1649 ; SSE2-NEXT: psrad $16, %xmm1
1650 ; SSE2-NEXT: pslld $16, %xmm0
1651 ; SSE2-NEXT: psrad $16, %xmm0
1652 ; SSE2-NEXT: packssdw %xmm1, %xmm0
1655 ; SSSE3-LABEL: trunc2x4i32_8i16_nsw:
1656 ; SSSE3: # %bb.0: # %entry
1657 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1658 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
1659 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
1660 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1663 ; SSE41-LABEL: trunc2x4i32_8i16_nsw:
1664 ; SSE41: # %bb.0: # %entry
1665 ; SSE41-NEXT: pxor %xmm2, %xmm2
1666 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
1667 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1668 ; SSE41-NEXT: packusdw %xmm1, %xmm0
1671 ; AVX-LABEL: trunc2x4i32_8i16_nsw:
1672 ; AVX: # %bb.0: # %entry
1673 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
1674 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
1675 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1676 ; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1679 ; AVX512F-LABEL: trunc2x4i32_8i16_nsw:
1680 ; AVX512F: # %bb.0: # %entry
1681 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1682 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1683 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
1684 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1685 ; AVX512F-NEXT: vzeroupper
1686 ; AVX512F-NEXT: retq
1688 ; AVX512VL-LABEL: trunc2x4i32_8i16_nsw:
1689 ; AVX512VL: # %bb.0: # %entry
1690 ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1691 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1692 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
1693 ; AVX512VL-NEXT: vzeroupper
1694 ; AVX512VL-NEXT: retq
1696 ; AVX512BW-LABEL: trunc2x4i32_8i16_nsw:
1697 ; AVX512BW: # %bb.0: # %entry
1698 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1699 ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1700 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
1701 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1702 ; AVX512BW-NEXT: vzeroupper
1703 ; AVX512BW-NEXT: retq
1705 ; AVX512BWVL-LABEL: trunc2x4i32_8i16_nsw:
1706 ; AVX512BWVL: # %bb.0: # %entry
1707 ; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1708 ; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1709 ; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
1710 ; AVX512BWVL-NEXT: vzeroupper
1711 ; AVX512BWVL-NEXT: retq
1713 %0 = trunc nsw <4 x i32> %a to <4 x i16>
1714 %1 = trunc nsw <4 x i32> %b to <4 x i16>
1715 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1719 define <8 x i16> @trunc2x4i32_8i16_nuw(<4 x i32> %a, <4 x i32> %b) {
1720 ; SSE2-LABEL: trunc2x4i32_8i16_nuw:
1721 ; SSE2: # %bb.0: # %entry
1722 ; SSE2-NEXT: pslld $16, %xmm1
1723 ; SSE2-NEXT: psrad $16, %xmm1
1724 ; SSE2-NEXT: pslld $16, %xmm0
1725 ; SSE2-NEXT: psrad $16, %xmm0
1726 ; SSE2-NEXT: packssdw %xmm1, %xmm0
1729 ; SSSE3-LABEL: trunc2x4i32_8i16_nuw:
1730 ; SSSE3: # %bb.0: # %entry
1731 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1732 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
1733 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
1734 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1737 ; SSE41-LABEL: trunc2x4i32_8i16_nuw:
1738 ; SSE41: # %bb.0: # %entry
1739 ; SSE41-NEXT: pxor %xmm2, %xmm2
1740 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
1741 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1742 ; SSE41-NEXT: packusdw %xmm1, %xmm0
1745 ; AVX-LABEL: trunc2x4i32_8i16_nuw:
1746 ; AVX: # %bb.0: # %entry
1747 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
1748 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
1749 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1750 ; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
1753 ; AVX512F-LABEL: trunc2x4i32_8i16_nuw:
1754 ; AVX512F: # %bb.0: # %entry
1755 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1756 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1757 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
1758 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1759 ; AVX512F-NEXT: vzeroupper
1760 ; AVX512F-NEXT: retq
1762 ; AVX512VL-LABEL: trunc2x4i32_8i16_nuw:
1763 ; AVX512VL: # %bb.0: # %entry
1764 ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1765 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1766 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
1767 ; AVX512VL-NEXT: vzeroupper
1768 ; AVX512VL-NEXT: retq
1770 ; AVX512BW-LABEL: trunc2x4i32_8i16_nuw:
1771 ; AVX512BW: # %bb.0: # %entry
1772 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1773 ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1774 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
1775 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1776 ; AVX512BW-NEXT: vzeroupper
1777 ; AVX512BW-NEXT: retq
1779 ; AVX512BWVL-LABEL: trunc2x4i32_8i16_nuw:
1780 ; AVX512BWVL: # %bb.0: # %entry
1781 ; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1782 ; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1783 ; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
1784 ; AVX512BWVL-NEXT: vzeroupper
1785 ; AVX512BWVL-NEXT: retq
1787 %0 = trunc nuw <4 x i32> %a to <4 x i16>
1788 %1 = trunc nuw <4 x i32> %b to <4 x i16>
1789 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1793 define <32 x i8> @trunc2x16i16_32i8_nsw(<16 x i16> %a, <16 x i16> %b) {
1794 ; SSE2-SSSE3-LABEL: trunc2x16i16_32i8_nsw:
1795 ; SSE2-SSSE3: # %bb.0: # %entry
1796 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1797 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
1798 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
1799 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
1800 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
1801 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4
1802 ; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm4
1803 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
1804 ; SSE2-SSSE3-NEXT: retq
1806 ; SSE41-LABEL: trunc2x16i16_32i8_nsw:
1807 ; SSE41: # %bb.0: # %entry
1808 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
1809 ; SSE41-NEXT: pand %xmm4, %xmm1
1810 ; SSE41-NEXT: pand %xmm4, %xmm0
1811 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1812 ; SSE41-NEXT: pand %xmm4, %xmm3
1813 ; SSE41-NEXT: pand %xmm2, %xmm4
1814 ; SSE41-NEXT: packuswb %xmm3, %xmm4
1815 ; SSE41-NEXT: movdqa %xmm4, %xmm1
1818 ; AVX1-LABEL: trunc2x16i16_32i8_nsw:
1819 ; AVX1: # %bb.0: # %entry
1820 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
1821 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1822 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1823 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
1824 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1825 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1826 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
1827 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1830 ; AVX2-LABEL: trunc2x16i16_32i8_nsw:
1831 ; AVX2: # %bb.0: # %entry
1832 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
1833 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
1834 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1835 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1836 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1839 ; AVX512F-LABEL: trunc2x16i16_32i8_nsw:
1840 ; AVX512F: # %bb.0: # %entry
1841 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1842 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1843 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1844 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
1845 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1846 ; AVX512F-NEXT: retq
1848 ; AVX512VL-LABEL: trunc2x16i16_32i8_nsw:
1849 ; AVX512VL: # %bb.0: # %entry
1850 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1851 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
1852 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1853 ; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
1854 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1855 ; AVX512VL-NEXT: retq
1857 ; AVX512BW-LABEL: trunc2x16i16_32i8_nsw:
1858 ; AVX512BW: # %bb.0: # %entry
1859 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1860 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1861 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1862 ; AVX512BW-NEXT: retq
1864 ; AVX512BWVL-LABEL: trunc2x16i16_32i8_nsw:
1865 ; AVX512BWVL: # %bb.0: # %entry
1866 ; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1867 ; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1868 ; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
1869 ; AVX512BWVL-NEXT: retq
1871 %0 = trunc nsw <16 x i16> %a to <16 x i8>
1872 %1 = trunc nsw <16 x i16> %b to <16 x i8>
1873 %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1877 define <32 x i8> @trunc2x16i16_32i8_nuw(<16 x i16> %a, <16 x i16> %b) {
1878 ; SSE2-SSSE3-LABEL: trunc2x16i16_32i8_nuw:
1879 ; SSE2-SSSE3: # %bb.0: # %entry
1880 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1881 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
1882 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
1883 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
1884 ; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
1885 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4
1886 ; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm4
1887 ; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
1888 ; SSE2-SSSE3-NEXT: retq
1890 ; SSE41-LABEL: trunc2x16i16_32i8_nuw:
1891 ; SSE41: # %bb.0: # %entry
1892 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
1893 ; SSE41-NEXT: pand %xmm4, %xmm1
1894 ; SSE41-NEXT: pand %xmm4, %xmm0
1895 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1896 ; SSE41-NEXT: pand %xmm4, %xmm3
1897 ; SSE41-NEXT: pand %xmm2, %xmm4
1898 ; SSE41-NEXT: packuswb %xmm3, %xmm4
1899 ; SSE41-NEXT: movdqa %xmm4, %xmm1
1902 ; AVX1-LABEL: trunc2x16i16_32i8_nuw:
1903 ; AVX1: # %bb.0: # %entry
1904 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
1905 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1906 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1907 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
1908 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1909 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1910 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
1911 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1914 ; AVX2-LABEL: trunc2x16i16_32i8_nuw:
1915 ; AVX2: # %bb.0: # %entry
1916 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
1917 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
1918 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1919 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
1920 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1923 ; AVX512F-LABEL: trunc2x16i16_32i8_nuw:
1924 ; AVX512F: # %bb.0: # %entry
1925 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1926 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1927 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1928 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
1929 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1930 ; AVX512F-NEXT: retq
1932 ; AVX512VL-LABEL: trunc2x16i16_32i8_nuw:
1933 ; AVX512VL: # %bb.0: # %entry
1934 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1935 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
1936 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1937 ; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
1938 ; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1939 ; AVX512VL-NEXT: retq
1941 ; AVX512BW-LABEL: trunc2x16i16_32i8_nuw:
1942 ; AVX512BW: # %bb.0: # %entry
1943 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1944 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1945 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1946 ; AVX512BW-NEXT: retq
1948 ; AVX512BWVL-LABEL: trunc2x16i16_32i8_nuw:
1949 ; AVX512BWVL: # %bb.0: # %entry
1950 ; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1951 ; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1952 ; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
1953 ; AVX512BWVL-NEXT: retq
1955 %0 = trunc nuw <16 x i16> %a to <16 x i8>
1956 %1 = trunc nuw <16 x i16> %b to <16 x i8>
1957 %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1961 define <16 x i8> @trunc2x8i16_16i8_nsw(<8 x i16> %a, <8 x i16> %b) {
1962 ; SSE2-SSSE3-LABEL: trunc2x8i16_16i8_nsw:
1963 ; SSE2-SSSE3: # %bb.0: # %entry
1964 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1965 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
1966 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
1967 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
1968 ; SSE2-SSSE3-NEXT: retq
1970 ; SSE41-LABEL: trunc2x8i16_16i8_nsw:
1971 ; SSE41: # %bb.0: # %entry
1972 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1973 ; SSE41-NEXT: pand %xmm2, %xmm1
1974 ; SSE41-NEXT: pand %xmm2, %xmm0
1975 ; SSE41-NEXT: packuswb %xmm1, %xmm0
1978 ; AVX1-LABEL: trunc2x8i16_16i8_nsw:
1979 ; AVX1: # %bb.0: # %entry
1980 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1981 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
1982 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1983 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1986 ; AVX2-LABEL: trunc2x8i16_16i8_nsw:
1987 ; AVX2: # %bb.0: # %entry
1988 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1989 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
1990 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
1991 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
1994 ; AVX512F-LABEL: trunc2x8i16_16i8_nsw:
1995 ; AVX512F: # %bb.0: # %entry
1996 ; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1997 ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
1998 ; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
1999 ; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2000 ; AVX512F-NEXT: retq
2002 ; AVX512VL-LABEL: trunc2x8i16_16i8_nsw:
2003 ; AVX512VL: # %bb.0: # %entry
2004 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2005 ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
2006 ; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
2007 ; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2008 ; AVX512VL-NEXT: retq
2010 ; AVX512BW-LABEL: trunc2x8i16_16i8_nsw:
2011 ; AVX512BW: # %bb.0: # %entry
2012 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2013 ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2014 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
2015 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2016 ; AVX512BW-NEXT: vzeroupper
2017 ; AVX512BW-NEXT: retq
2019 ; AVX512BWVL-LABEL: trunc2x8i16_16i8_nsw:
2020 ; AVX512BWVL: # %bb.0: # %entry
2021 ; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2022 ; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2023 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
2024 ; AVX512BWVL-NEXT: vzeroupper
2025 ; AVX512BWVL-NEXT: retq
2027 %0 = trunc nsw <8 x i16> %a to <8 x i8>
2028 %1 = trunc nsw <8 x i16> %b to <8 x i8>
2029 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2033 define <16 x i8> @trunc2x8i16_16i8_nuw(<8 x i16> %a, <8 x i16> %b) {
2034 ; SSE2-SSSE3-LABEL: trunc2x8i16_16i8_nuw:
2035 ; SSE2-SSSE3: # %bb.0: # %entry
2036 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2037 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
2038 ; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
2039 ; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
2040 ; SSE2-SSSE3-NEXT: retq
2042 ; SSE41-LABEL: trunc2x8i16_16i8_nuw:
2043 ; SSE41: # %bb.0: # %entry
2044 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2045 ; SSE41-NEXT: pand %xmm2, %xmm1
2046 ; SSE41-NEXT: pand %xmm2, %xmm0
2047 ; SSE41-NEXT: packuswb %xmm1, %xmm0
2050 ; AVX1-LABEL: trunc2x8i16_16i8_nuw:
2051 ; AVX1: # %bb.0: # %entry
2052 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2053 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
2054 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
2055 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2058 ; AVX2-LABEL: trunc2x8i16_16i8_nuw:
2059 ; AVX2: # %bb.0: # %entry
2060 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2061 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
2062 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
2063 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2066 ; AVX512F-LABEL: trunc2x8i16_16i8_nuw:
2067 ; AVX512F: # %bb.0: # %entry
2068 ; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2069 ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
2070 ; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
2071 ; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2072 ; AVX512F-NEXT: retq
2074 ; AVX512VL-LABEL: trunc2x8i16_16i8_nuw:
2075 ; AVX512VL: # %bb.0: # %entry
2076 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
2077 ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
2078 ; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
2079 ; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
2080 ; AVX512VL-NEXT: retq
2082 ; AVX512BW-LABEL: trunc2x8i16_16i8_nuw:
2083 ; AVX512BW: # %bb.0: # %entry
2084 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2085 ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2086 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
2087 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2088 ; AVX512BW-NEXT: vzeroupper
2089 ; AVX512BW-NEXT: retq
2091 ; AVX512BWVL-LABEL: trunc2x8i16_16i8_nuw:
2092 ; AVX512BWVL: # %bb.0: # %entry
2093 ; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2094 ; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2095 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
2096 ; AVX512BWVL-NEXT: vzeroupper
2097 ; AVX512BWVL-NEXT: retq
2099 %0 = trunc nuw <8 x i16> %a to <8 x i8>
2100 %1 = trunc nuw <8 x i16> %b to <8 x i8>
2101 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2105 define i64 @trunc8i16_i64_nsw(<8 x i16> %inval) {
2106 ; SSE2-LABEL: trunc8i16_i64_nsw:
2107 ; SSE2: # %bb.0: # %entry
2108 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2109 ; SSE2-NEXT: packuswb %xmm0, %xmm0
2110 ; SSE2-NEXT: movq %xmm0, %rax
2113 ; SSSE3-LABEL: trunc8i16_i64_nsw:
2114 ; SSSE3: # %bb.0: # %entry
2115 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2116 ; SSSE3-NEXT: movq %xmm0, %rax
2119 ; SSE41-LABEL: trunc8i16_i64_nsw:
2120 ; SSE41: # %bb.0: # %entry
2121 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2122 ; SSE41-NEXT: movq %xmm0, %rax
2125 ; AVX-LABEL: trunc8i16_i64_nsw:
2126 ; AVX: # %bb.0: # %entry
2127 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2128 ; AVX-NEXT: vmovq %xmm0, %rax
2131 ; AVX512F-LABEL: trunc8i16_i64_nsw:
2132 ; AVX512F: # %bb.0: # %entry
2133 ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2134 ; AVX512F-NEXT: vmovq %xmm0, %rax
2135 ; AVX512F-NEXT: retq
2137 ; AVX512VL-LABEL: trunc8i16_i64_nsw:
2138 ; AVX512VL: # %bb.0: # %entry
2139 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2140 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2141 ; AVX512VL-NEXT: retq
2143 ; AVX512BW-LABEL: trunc8i16_i64_nsw:
2144 ; AVX512BW: # %bb.0: # %entry
2145 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2146 ; AVX512BW-NEXT: vmovq %xmm0, %rax
2147 ; AVX512BW-NEXT: retq
2149 ; AVX512BWVL-LABEL: trunc8i16_i64_nsw:
2150 ; AVX512BWVL: # %bb.0: # %entry
2151 ; AVX512BWVL-NEXT: vpmovwb %xmm0, %xmm0
2152 ; AVX512BWVL-NEXT: vmovq %xmm0, %rax
2153 ; AVX512BWVL-NEXT: retq
2155 %0 = trunc nsw <8 x i16> %inval to <8 x i8>
2156 %1 = bitcast <8 x i8> %0 to i64
2160 define i64 @trunc8i16_i64_nuw(<8 x i16> %inval) {
2161 ; SSE2-LABEL: trunc8i16_i64_nuw:
2162 ; SSE2: # %bb.0: # %entry
2163 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2164 ; SSE2-NEXT: packuswb %xmm0, %xmm0
2165 ; SSE2-NEXT: movq %xmm0, %rax
2168 ; SSSE3-LABEL: trunc8i16_i64_nuw:
2169 ; SSSE3: # %bb.0: # %entry
2170 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2171 ; SSSE3-NEXT: movq %xmm0, %rax
2174 ; SSE41-LABEL: trunc8i16_i64_nuw:
2175 ; SSE41: # %bb.0: # %entry
2176 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2177 ; SSE41-NEXT: movq %xmm0, %rax
2180 ; AVX-LABEL: trunc8i16_i64_nuw:
2181 ; AVX: # %bb.0: # %entry
2182 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2183 ; AVX-NEXT: vmovq %xmm0, %rax
2186 ; AVX512F-LABEL: trunc8i16_i64_nuw:
2187 ; AVX512F: # %bb.0: # %entry
2188 ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2189 ; AVX512F-NEXT: vmovq %xmm0, %rax
2190 ; AVX512F-NEXT: retq
2192 ; AVX512VL-LABEL: trunc8i16_i64_nuw:
2193 ; AVX512VL: # %bb.0: # %entry
2194 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2195 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2196 ; AVX512VL-NEXT: retq
2198 ; AVX512BW-LABEL: trunc8i16_i64_nuw:
2199 ; AVX512BW: # %bb.0: # %entry
2200 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
2201 ; AVX512BW-NEXT: vmovq %xmm0, %rax
2202 ; AVX512BW-NEXT: retq
2204 ; AVX512BWVL-LABEL: trunc8i16_i64_nuw:
2205 ; AVX512BWVL: # %bb.0: # %entry
2206 ; AVX512BWVL-NEXT: vpmovwb %xmm0, %xmm0
2207 ; AVX512BWVL-NEXT: vmovq %xmm0, %rax
2208 ; AVX512BWVL-NEXT: retq
2210 %0 = trunc nuw <8 x i16> %inval to <8 x i8>
2211 %1 = bitcast <8 x i8> %0 to i64