1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
8 ; PR6455 'Clear Upper Bits' Patterns
11 define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind {
12 ; SSE2-LABEL: _clearupper2xi64a:
14 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17 ; SSE42-LABEL: _clearupper2xi64a:
19 ; SSE42-NEXT: xorps %xmm1, %xmm1
20 ; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
23 ; AVX-LABEL: _clearupper2xi64a:
25 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
26 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
28 %x0 = extractelement <2 x i64> %0, i32 0
29 %x1 = extractelement <2 x i64> %0, i32 1
30 %trunc0 = trunc i64 %x0 to i32
31 %trunc1 = trunc i64 %x1 to i32
32 %ext0 = zext i32 %trunc0 to i64
33 %ext1 = zext i32 %trunc1 to i64
34 %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
35 %v1 = insertelement <2 x i64> %v0, i64 %ext1, i32 1
39 define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind {
40 ; SSE2-LABEL: _clearupper4xi64a:
42 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,4294967295]
43 ; SSE2-NEXT: andps %xmm2, %xmm0
44 ; SSE2-NEXT: andps %xmm2, %xmm1
47 ; SSE42-LABEL: _clearupper4xi64a:
49 ; SSE42-NEXT: xorps %xmm2, %xmm2
50 ; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
51 ; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
54 ; AVX-LABEL: _clearupper4xi64a:
56 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
57 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
59 %x0 = extractelement <4 x i64> %0, i32 0
60 %x1 = extractelement <4 x i64> %0, i32 1
61 %x2 = extractelement <4 x i64> %0, i32 2
62 %x3 = extractelement <4 x i64> %0, i32 3
63 %trunc0 = trunc i64 %x0 to i32
64 %trunc1 = trunc i64 %x1 to i32
65 %trunc2 = trunc i64 %x2 to i32
66 %trunc3 = trunc i64 %x3 to i32
67 %ext0 = zext i32 %trunc0 to i64
68 %ext1 = zext i32 %trunc1 to i64
69 %ext2 = zext i32 %trunc2 to i64
70 %ext3 = zext i32 %trunc3 to i64
71 %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0
72 %v1 = insertelement <4 x i64> %v0, i64 %ext1, i32 1
73 %v2 = insertelement <4 x i64> %v1, i64 %ext2, i32 2
74 %v3 = insertelement <4 x i64> %v2, i64 %ext3, i32 3
78 define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind {
79 ; SSE2-LABEL: _clearupper4xi32a:
81 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
84 ; SSE42-LABEL: _clearupper4xi32a:
86 ; SSE42-NEXT: pxor %xmm1, %xmm1
87 ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
90 ; AVX-LABEL: _clearupper4xi32a:
92 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
93 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
95 %x0 = extractelement <4 x i32> %0, i32 0
96 %x1 = extractelement <4 x i32> %0, i32 1
97 %x2 = extractelement <4 x i32> %0, i32 2
98 %x3 = extractelement <4 x i32> %0, i32 3
99 %trunc0 = trunc i32 %x0 to i16
100 %trunc1 = trunc i32 %x1 to i16
101 %trunc2 = trunc i32 %x2 to i16
102 %trunc3 = trunc i32 %x3 to i16
103 %ext0 = zext i16 %trunc0 to i32
104 %ext1 = zext i16 %trunc1 to i32
105 %ext2 = zext i16 %trunc2 to i32
106 %ext3 = zext i16 %trunc3 to i32
107 %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
108 %v1 = insertelement <4 x i32> %v0, i32 %ext1, i32 1
109 %v2 = insertelement <4 x i32> %v1, i32 %ext2, i32 2
110 %v3 = insertelement <4 x i32> %v2, i32 %ext3, i32 3
114 define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind {
115 ; SSE2-LABEL: _clearupper8xi32a:
117 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,65535,65535,65535]
118 ; SSE2-NEXT: andps %xmm2, %xmm0
119 ; SSE2-NEXT: andps %xmm2, %xmm1
122 ; SSE42-LABEL: _clearupper8xi32a:
124 ; SSE42-NEXT: pxor %xmm2, %xmm2
125 ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
126 ; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
129 ; AVX1-LABEL: _clearupper8xi32a:
131 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
134 ; AVX2-LABEL: _clearupper8xi32a:
136 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
137 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
139 %x0 = extractelement <8 x i32> %0, i32 0
140 %x1 = extractelement <8 x i32> %0, i32 1
141 %x2 = extractelement <8 x i32> %0, i32 2
142 %x3 = extractelement <8 x i32> %0, i32 3
143 %x4 = extractelement <8 x i32> %0, i32 4
144 %x5 = extractelement <8 x i32> %0, i32 5
145 %x6 = extractelement <8 x i32> %0, i32 6
146 %x7 = extractelement <8 x i32> %0, i32 7
147 %trunc0 = trunc i32 %x0 to i16
148 %trunc1 = trunc i32 %x1 to i16
149 %trunc2 = trunc i32 %x2 to i16
150 %trunc3 = trunc i32 %x3 to i16
151 %trunc4 = trunc i32 %x4 to i16
152 %trunc5 = trunc i32 %x5 to i16
153 %trunc6 = trunc i32 %x6 to i16
154 %trunc7 = trunc i32 %x7 to i16
155 %ext0 = zext i16 %trunc0 to i32
156 %ext1 = zext i16 %trunc1 to i32
157 %ext2 = zext i16 %trunc2 to i32
158 %ext3 = zext i16 %trunc3 to i32
159 %ext4 = zext i16 %trunc4 to i32
160 %ext5 = zext i16 %trunc5 to i32
161 %ext6 = zext i16 %trunc6 to i32
162 %ext7 = zext i16 %trunc7 to i32
163 %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0
164 %v1 = insertelement <8 x i32> %v0, i32 %ext1, i32 1
165 %v2 = insertelement <8 x i32> %v1, i32 %ext2, i32 2
166 %v3 = insertelement <8 x i32> %v2, i32 %ext3, i32 3
167 %v4 = insertelement <8 x i32> %v3, i32 %ext4, i32 4
168 %v5 = insertelement <8 x i32> %v4, i32 %ext5, i32 5
169 %v6 = insertelement <8 x i32> %v5, i32 %ext6, i32 6
170 %v7 = insertelement <8 x i32> %v6, i32 %ext7, i32 7
174 define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
175 ; SSE-LABEL: _clearupper8xi16a:
177 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
180 ; AVX-LABEL: _clearupper8xi16a:
182 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
184 %x0 = extractelement <8 x i16> %0, i32 0
185 %x1 = extractelement <8 x i16> %0, i32 1
186 %x2 = extractelement <8 x i16> %0, i32 2
187 %x3 = extractelement <8 x i16> %0, i32 3
188 %x4 = extractelement <8 x i16> %0, i32 4
189 %x5 = extractelement <8 x i16> %0, i32 5
190 %x6 = extractelement <8 x i16> %0, i32 6
191 %x7 = extractelement <8 x i16> %0, i32 7
192 %trunc0 = trunc i16 %x0 to i8
193 %trunc1 = trunc i16 %x1 to i8
194 %trunc2 = trunc i16 %x2 to i8
195 %trunc3 = trunc i16 %x3 to i8
196 %trunc4 = trunc i16 %x4 to i8
197 %trunc5 = trunc i16 %x5 to i8
198 %trunc6 = trunc i16 %x6 to i8
199 %trunc7 = trunc i16 %x7 to i8
200 %ext0 = zext i8 %trunc0 to i16
201 %ext1 = zext i8 %trunc1 to i16
202 %ext2 = zext i8 %trunc2 to i16
203 %ext3 = zext i8 %trunc3 to i16
204 %ext4 = zext i8 %trunc4 to i16
205 %ext5 = zext i8 %trunc5 to i16
206 %ext6 = zext i8 %trunc6 to i16
207 %ext7 = zext i8 %trunc7 to i16
208 %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0
209 %v1 = insertelement <8 x i16> %v0, i16 %ext1, i32 1
210 %v2 = insertelement <8 x i16> %v1, i16 %ext2, i32 2
211 %v3 = insertelement <8 x i16> %v2, i16 %ext3, i32 3
212 %v4 = insertelement <8 x i16> %v3, i16 %ext4, i32 4
213 %v5 = insertelement <8 x i16> %v4, i16 %ext5, i32 5
214 %v6 = insertelement <8 x i16> %v5, i16 %ext6, i32 6
215 %v7 = insertelement <8 x i16> %v6, i16 %ext7, i32 7
219 define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
220 ; SSE-LABEL: _clearupper16xi16a:
222 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
223 ; SSE-NEXT: andps %xmm2, %xmm0
224 ; SSE-NEXT: andps %xmm2, %xmm1
227 ; AVX-LABEL: _clearupper16xi16a:
229 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
231 %x0 = extractelement <16 x i16> %0, i32 0
232 %x1 = extractelement <16 x i16> %0, i32 1
233 %x2 = extractelement <16 x i16> %0, i32 2
234 %x3 = extractelement <16 x i16> %0, i32 3
235 %x4 = extractelement <16 x i16> %0, i32 4
236 %x5 = extractelement <16 x i16> %0, i32 5
237 %x6 = extractelement <16 x i16> %0, i32 6
238 %x7 = extractelement <16 x i16> %0, i32 7
239 %x8 = extractelement <16 x i16> %0, i32 8
240 %x9 = extractelement <16 x i16> %0, i32 9
241 %x10 = extractelement <16 x i16> %0, i32 10
242 %x11 = extractelement <16 x i16> %0, i32 11
243 %x12 = extractelement <16 x i16> %0, i32 12
244 %x13 = extractelement <16 x i16> %0, i32 13
245 %x14 = extractelement <16 x i16> %0, i32 14
246 %x15 = extractelement <16 x i16> %0, i32 15
247 %trunc0 = trunc i16 %x0 to i8
248 %trunc1 = trunc i16 %x1 to i8
249 %trunc2 = trunc i16 %x2 to i8
250 %trunc3 = trunc i16 %x3 to i8
251 %trunc4 = trunc i16 %x4 to i8
252 %trunc5 = trunc i16 %x5 to i8
253 %trunc6 = trunc i16 %x6 to i8
254 %trunc7 = trunc i16 %x7 to i8
255 %trunc8 = trunc i16 %x8 to i8
256 %trunc9 = trunc i16 %x9 to i8
257 %trunc10 = trunc i16 %x10 to i8
258 %trunc11 = trunc i16 %x11 to i8
259 %trunc12 = trunc i16 %x12 to i8
260 %trunc13 = trunc i16 %x13 to i8
261 %trunc14 = trunc i16 %x14 to i8
262 %trunc15 = trunc i16 %x15 to i8
263 %ext0 = zext i8 %trunc0 to i16
264 %ext1 = zext i8 %trunc1 to i16
265 %ext2 = zext i8 %trunc2 to i16
266 %ext3 = zext i8 %trunc3 to i16
267 %ext4 = zext i8 %trunc4 to i16
268 %ext5 = zext i8 %trunc5 to i16
269 %ext6 = zext i8 %trunc6 to i16
270 %ext7 = zext i8 %trunc7 to i16
271 %ext8 = zext i8 %trunc8 to i16
272 %ext9 = zext i8 %trunc9 to i16
273 %ext10 = zext i8 %trunc10 to i16
274 %ext11 = zext i8 %trunc11 to i16
275 %ext12 = zext i8 %trunc12 to i16
276 %ext13 = zext i8 %trunc13 to i16
277 %ext14 = zext i8 %trunc14 to i16
278 %ext15 = zext i8 %trunc15 to i16
279 %v0 = insertelement <16 x i16> undef, i16 %ext0, i32 0
280 %v1 = insertelement <16 x i16> %v0, i16 %ext1, i32 1
281 %v2 = insertelement <16 x i16> %v1, i16 %ext2, i32 2
282 %v3 = insertelement <16 x i16> %v2, i16 %ext3, i32 3
283 %v4 = insertelement <16 x i16> %v3, i16 %ext4, i32 4
284 %v5 = insertelement <16 x i16> %v4, i16 %ext5, i32 5
285 %v6 = insertelement <16 x i16> %v5, i16 %ext6, i32 6
286 %v7 = insertelement <16 x i16> %v6, i16 %ext7, i32 7
287 %v8 = insertelement <16 x i16> %v7, i16 %ext8, i32 8
288 %v9 = insertelement <16 x i16> %v8, i16 %ext9, i32 9
289 %v10 = insertelement <16 x i16> %v9, i16 %ext10, i32 10
290 %v11 = insertelement <16 x i16> %v10, i16 %ext11, i32 11
291 %v12 = insertelement <16 x i16> %v11, i16 %ext12, i32 12
292 %v13 = insertelement <16 x i16> %v12, i16 %ext13, i32 13
293 %v14 = insertelement <16 x i16> %v13, i16 %ext14, i32 14
294 %v15 = insertelement <16 x i16> %v14, i16 %ext15, i32 15
298 define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
299 ; SSE-LABEL: _clearupper16xi8a:
301 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
304 ; AVX-LABEL: _clearupper16xi8a:
306 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
308 %x0 = extractelement <16 x i8> %0, i32 0
309 %x1 = extractelement <16 x i8> %0, i32 1
310 %x2 = extractelement <16 x i8> %0, i32 2
311 %x3 = extractelement <16 x i8> %0, i32 3
312 %x4 = extractelement <16 x i8> %0, i32 4
313 %x5 = extractelement <16 x i8> %0, i32 5
314 %x6 = extractelement <16 x i8> %0, i32 6
315 %x7 = extractelement <16 x i8> %0, i32 7
316 %x8 = extractelement <16 x i8> %0, i32 8
317 %x9 = extractelement <16 x i8> %0, i32 9
318 %x10 = extractelement <16 x i8> %0, i32 10
319 %x11 = extractelement <16 x i8> %0, i32 11
320 %x12 = extractelement <16 x i8> %0, i32 12
321 %x13 = extractelement <16 x i8> %0, i32 13
322 %x14 = extractelement <16 x i8> %0, i32 14
323 %x15 = extractelement <16 x i8> %0, i32 15
324 %trunc0 = trunc i8 %x0 to i4
325 %trunc1 = trunc i8 %x1 to i4
326 %trunc2 = trunc i8 %x2 to i4
327 %trunc3 = trunc i8 %x3 to i4
328 %trunc4 = trunc i8 %x4 to i4
329 %trunc5 = trunc i8 %x5 to i4
330 %trunc6 = trunc i8 %x6 to i4
331 %trunc7 = trunc i8 %x7 to i4
332 %trunc8 = trunc i8 %x8 to i4
333 %trunc9 = trunc i8 %x9 to i4
334 %trunc10 = trunc i8 %x10 to i4
335 %trunc11 = trunc i8 %x11 to i4
336 %trunc12 = trunc i8 %x12 to i4
337 %trunc13 = trunc i8 %x13 to i4
338 %trunc14 = trunc i8 %x14 to i4
339 %trunc15 = trunc i8 %x15 to i4
340 %ext0 = zext i4 %trunc0 to i8
341 %ext1 = zext i4 %trunc1 to i8
342 %ext2 = zext i4 %trunc2 to i8
343 %ext3 = zext i4 %trunc3 to i8
344 %ext4 = zext i4 %trunc4 to i8
345 %ext5 = zext i4 %trunc5 to i8
346 %ext6 = zext i4 %trunc6 to i8
347 %ext7 = zext i4 %trunc7 to i8
348 %ext8 = zext i4 %trunc8 to i8
349 %ext9 = zext i4 %trunc9 to i8
350 %ext10 = zext i4 %trunc10 to i8
351 %ext11 = zext i4 %trunc11 to i8
352 %ext12 = zext i4 %trunc12 to i8
353 %ext13 = zext i4 %trunc13 to i8
354 %ext14 = zext i4 %trunc14 to i8
355 %ext15 = zext i4 %trunc15 to i8
356 %v0 = insertelement <16 x i8> undef, i8 %ext0, i32 0
357 %v1 = insertelement <16 x i8> %v0, i8 %ext1, i32 1
358 %v2 = insertelement <16 x i8> %v1, i8 %ext2, i32 2
359 %v3 = insertelement <16 x i8> %v2, i8 %ext3, i32 3
360 %v4 = insertelement <16 x i8> %v3, i8 %ext4, i32 4
361 %v5 = insertelement <16 x i8> %v4, i8 %ext5, i32 5
362 %v6 = insertelement <16 x i8> %v5, i8 %ext6, i32 6
363 %v7 = insertelement <16 x i8> %v6, i8 %ext7, i32 7
364 %v8 = insertelement <16 x i8> %v7, i8 %ext8, i32 8
365 %v9 = insertelement <16 x i8> %v8, i8 %ext9, i32 9
366 %v10 = insertelement <16 x i8> %v9, i8 %ext10, i32 10
367 %v11 = insertelement <16 x i8> %v10, i8 %ext11, i32 11
368 %v12 = insertelement <16 x i8> %v11, i8 %ext12, i32 12
369 %v13 = insertelement <16 x i8> %v12, i8 %ext13, i32 13
370 %v14 = insertelement <16 x i8> %v13, i8 %ext14, i32 14
371 %v15 = insertelement <16 x i8> %v14, i8 %ext15, i32 15
375 define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
376 ; SSE-LABEL: _clearupper32xi8a:
378 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
379 ; SSE-NEXT: andps %xmm2, %xmm0
380 ; SSE-NEXT: andps %xmm2, %xmm1
383 ; AVX-LABEL: _clearupper32xi8a:
385 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
387 %x0 = extractelement <32 x i8> %0, i32 0
388 %x1 = extractelement <32 x i8> %0, i32 1
389 %x2 = extractelement <32 x i8> %0, i32 2
390 %x3 = extractelement <32 x i8> %0, i32 3
391 %x4 = extractelement <32 x i8> %0, i32 4
392 %x5 = extractelement <32 x i8> %0, i32 5
393 %x6 = extractelement <32 x i8> %0, i32 6
394 %x7 = extractelement <32 x i8> %0, i32 7
395 %x8 = extractelement <32 x i8> %0, i32 8
396 %x9 = extractelement <32 x i8> %0, i32 9
397 %x10 = extractelement <32 x i8> %0, i32 10
398 %x11 = extractelement <32 x i8> %0, i32 11
399 %x12 = extractelement <32 x i8> %0, i32 12
400 %x13 = extractelement <32 x i8> %0, i32 13
401 %x14 = extractelement <32 x i8> %0, i32 14
402 %x15 = extractelement <32 x i8> %0, i32 15
403 %x16 = extractelement <32 x i8> %0, i32 16
404 %x17 = extractelement <32 x i8> %0, i32 17
405 %x18 = extractelement <32 x i8> %0, i32 18
406 %x19 = extractelement <32 x i8> %0, i32 19
407 %x20 = extractelement <32 x i8> %0, i32 20
408 %x21 = extractelement <32 x i8> %0, i32 21
409 %x22 = extractelement <32 x i8> %0, i32 22
410 %x23 = extractelement <32 x i8> %0, i32 23
411 %x24 = extractelement <32 x i8> %0, i32 24
412 %x25 = extractelement <32 x i8> %0, i32 25
413 %x26 = extractelement <32 x i8> %0, i32 26
414 %x27 = extractelement <32 x i8> %0, i32 27
415 %x28 = extractelement <32 x i8> %0, i32 28
416 %x29 = extractelement <32 x i8> %0, i32 29
417 %x30 = extractelement <32 x i8> %0, i32 30
418 %x31 = extractelement <32 x i8> %0, i32 31
419 %trunc0 = trunc i8 %x0 to i4
420 %trunc1 = trunc i8 %x1 to i4
421 %trunc2 = trunc i8 %x2 to i4
422 %trunc3 = trunc i8 %x3 to i4
423 %trunc4 = trunc i8 %x4 to i4
424 %trunc5 = trunc i8 %x5 to i4
425 %trunc6 = trunc i8 %x6 to i4
426 %trunc7 = trunc i8 %x7 to i4
427 %trunc8 = trunc i8 %x8 to i4
428 %trunc9 = trunc i8 %x9 to i4
429 %trunc10 = trunc i8 %x10 to i4
430 %trunc11 = trunc i8 %x11 to i4
431 %trunc12 = trunc i8 %x12 to i4
432 %trunc13 = trunc i8 %x13 to i4
433 %trunc14 = trunc i8 %x14 to i4
434 %trunc15 = trunc i8 %x15 to i4
435 %trunc16 = trunc i8 %x16 to i4
436 %trunc17 = trunc i8 %x17 to i4
437 %trunc18 = trunc i8 %x18 to i4
438 %trunc19 = trunc i8 %x19 to i4
439 %trunc20 = trunc i8 %x20 to i4
440 %trunc21 = trunc i8 %x21 to i4
441 %trunc22 = trunc i8 %x22 to i4
442 %trunc23 = trunc i8 %x23 to i4
443 %trunc24 = trunc i8 %x24 to i4
444 %trunc25 = trunc i8 %x25 to i4
445 %trunc26 = trunc i8 %x26 to i4
446 %trunc27 = trunc i8 %x27 to i4
447 %trunc28 = trunc i8 %x28 to i4
448 %trunc29 = trunc i8 %x29 to i4
449 %trunc30 = trunc i8 %x30 to i4
450 %trunc31 = trunc i8 %x31 to i4
451 %ext0 = zext i4 %trunc0 to i8
452 %ext1 = zext i4 %trunc1 to i8
453 %ext2 = zext i4 %trunc2 to i8
454 %ext3 = zext i4 %trunc3 to i8
455 %ext4 = zext i4 %trunc4 to i8
456 %ext5 = zext i4 %trunc5 to i8
457 %ext6 = zext i4 %trunc6 to i8
458 %ext7 = zext i4 %trunc7 to i8
459 %ext8 = zext i4 %trunc8 to i8
460 %ext9 = zext i4 %trunc9 to i8
461 %ext10 = zext i4 %trunc10 to i8
462 %ext11 = zext i4 %trunc11 to i8
463 %ext12 = zext i4 %trunc12 to i8
464 %ext13 = zext i4 %trunc13 to i8
465 %ext14 = zext i4 %trunc14 to i8
466 %ext15 = zext i4 %trunc15 to i8
467 %ext16 = zext i4 %trunc16 to i8
468 %ext17 = zext i4 %trunc17 to i8
469 %ext18 = zext i4 %trunc18 to i8
470 %ext19 = zext i4 %trunc19 to i8
471 %ext20 = zext i4 %trunc20 to i8
472 %ext21 = zext i4 %trunc21 to i8
473 %ext22 = zext i4 %trunc22 to i8
474 %ext23 = zext i4 %trunc23 to i8
475 %ext24 = zext i4 %trunc24 to i8
476 %ext25 = zext i4 %trunc25 to i8
477 %ext26 = zext i4 %trunc26 to i8
478 %ext27 = zext i4 %trunc27 to i8
479 %ext28 = zext i4 %trunc28 to i8
480 %ext29 = zext i4 %trunc29 to i8
481 %ext30 = zext i4 %trunc30 to i8
482 %ext31 = zext i4 %trunc31 to i8
483 %v0 = insertelement <32 x i8> undef, i8 %ext0, i32 0
484 %v1 = insertelement <32 x i8> %v0, i8 %ext1, i32 1
485 %v2 = insertelement <32 x i8> %v1, i8 %ext2, i32 2
486 %v3 = insertelement <32 x i8> %v2, i8 %ext3, i32 3
487 %v4 = insertelement <32 x i8> %v3, i8 %ext4, i32 4
488 %v5 = insertelement <32 x i8> %v4, i8 %ext5, i32 5
489 %v6 = insertelement <32 x i8> %v5, i8 %ext6, i32 6
490 %v7 = insertelement <32 x i8> %v6, i8 %ext7, i32 7
491 %v8 = insertelement <32 x i8> %v7, i8 %ext8, i32 8
492 %v9 = insertelement <32 x i8> %v8, i8 %ext9, i32 9
493 %v10 = insertelement <32 x i8> %v9, i8 %ext10, i32 10
494 %v11 = insertelement <32 x i8> %v10, i8 %ext11, i32 11
495 %v12 = insertelement <32 x i8> %v11, i8 %ext12, i32 12
496 %v13 = insertelement <32 x i8> %v12, i8 %ext13, i32 13
497 %v14 = insertelement <32 x i8> %v13, i8 %ext14, i32 14
498 %v15 = insertelement <32 x i8> %v14, i8 %ext15, i32 15
499 %v16 = insertelement <32 x i8> %v15, i8 %ext16, i32 16
500 %v17 = insertelement <32 x i8> %v16, i8 %ext17, i32 17
501 %v18 = insertelement <32 x i8> %v17, i8 %ext18, i32 18
502 %v19 = insertelement <32 x i8> %v18, i8 %ext19, i32 19
503 %v20 = insertelement <32 x i8> %v19, i8 %ext20, i32 20
504 %v21 = insertelement <32 x i8> %v20, i8 %ext21, i32 21
505 %v22 = insertelement <32 x i8> %v21, i8 %ext22, i32 22
506 %v23 = insertelement <32 x i8> %v22, i8 %ext23, i32 23
507 %v24 = insertelement <32 x i8> %v23, i8 %ext24, i32 24
508 %v25 = insertelement <32 x i8> %v24, i8 %ext25, i32 25
509 %v26 = insertelement <32 x i8> %v25, i8 %ext26, i32 26
510 %v27 = insertelement <32 x i8> %v26, i8 %ext27, i32 27
511 %v28 = insertelement <32 x i8> %v27, i8 %ext28, i32 28
512 %v29 = insertelement <32 x i8> %v28, i8 %ext29, i32 29
513 %v30 = insertelement <32 x i8> %v29, i8 %ext30, i32 30
514 %v31 = insertelement <32 x i8> %v30, i8 %ext31, i32 31
518 define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind {
519 ; SSE2-LABEL: _clearupper2xi64b:
521 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
524 ; SSE42-LABEL: _clearupper2xi64b:
526 ; SSE42-NEXT: xorps %xmm1, %xmm1
527 ; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
530 ; AVX-LABEL: _clearupper2xi64b:
532 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
533 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
535 %x32 = bitcast <2 x i64> %0 to <4 x i32>
536 %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1
537 %r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3
538 %r = bitcast <4 x i32> %r1 to <2 x i64>
542 define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind {
543 ; SSE2-LABEL: _clearupper4xi64b:
545 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [NaN,0.0E+0,NaN,0.0E+0]
546 ; SSE2-NEXT: andps %xmm2, %xmm0
547 ; SSE2-NEXT: andps %xmm2, %xmm1
550 ; SSE42-LABEL: _clearupper4xi64b:
552 ; SSE42-NEXT: xorps %xmm2, %xmm2
553 ; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
554 ; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
557 ; AVX-LABEL: _clearupper4xi64b:
559 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
560 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
562 %x32 = bitcast <4 x i64> %0 to <8 x i32>
563 %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1
564 %r1 = insertelement <8 x i32> %r0, i32 zeroinitializer, i32 3
565 %r2 = insertelement <8 x i32> %r1, i32 zeroinitializer, i32 5
566 %r3 = insertelement <8 x i32> %r2, i32 zeroinitializer, i32 7
567 %r = bitcast <8 x i32> %r3 to <4 x i64>
571 define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
572 ; SSE2-LABEL: _clearupper4xi32b:
574 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
577 ; SSE42-LABEL: _clearupper4xi32b:
579 ; SSE42-NEXT: pxor %xmm1, %xmm1
580 ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
583 ; AVX-LABEL: _clearupper4xi32b:
585 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
586 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
588 %x16 = bitcast <4 x i32> %0 to <8 x i16>
589 %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1
590 %r1 = insertelement <8 x i16> %r0, i16 zeroinitializer, i32 3
591 %r2 = insertelement <8 x i16> %r1, i16 zeroinitializer, i32 5
592 %r3 = insertelement <8 x i16> %r2, i16 zeroinitializer, i32 7
593 %r = bitcast <8 x i16> %r3 to <4 x i32>
597 define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind {
598 ; SSE2-LABEL: _clearupper8xi32b:
600 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
601 ; SSE2-NEXT: andps %xmm2, %xmm0
602 ; SSE2-NEXT: andps %xmm2, %xmm1
605 ; SSE42-LABEL: _clearupper8xi32b:
607 ; SSE42-NEXT: pxor %xmm2, %xmm2
608 ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
609 ; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
612 ; AVX1-LABEL: _clearupper8xi32b:
614 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
617 ; AVX2-LABEL: _clearupper8xi32b:
619 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
620 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
622 %x16 = bitcast <8 x i32> %0 to <16 x i16>
623 %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1
624 %r1 = insertelement <16 x i16> %r0, i16 zeroinitializer, i32 3
625 %r2 = insertelement <16 x i16> %r1, i16 zeroinitializer, i32 5
626 %r3 = insertelement <16 x i16> %r2, i16 zeroinitializer, i32 7
627 %r4 = insertelement <16 x i16> %r3, i16 zeroinitializer, i32 9
628 %r5 = insertelement <16 x i16> %r4, i16 zeroinitializer, i32 11
629 %r6 = insertelement <16 x i16> %r5, i16 zeroinitializer, i32 13
630 %r7 = insertelement <16 x i16> %r6, i16 zeroinitializer, i32 15
631 %r = bitcast <16 x i16> %r7 to <8 x i32>
635 define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind {
636 ; SSE-LABEL: _clearupper8xi16b:
638 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
641 ; AVX-LABEL: _clearupper8xi16b:
643 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
645 %x8 = bitcast <8 x i16> %0 to <16 x i8>
646 %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1
647 %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3
648 %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5
649 %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7
650 %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9
651 %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11
652 %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13
653 %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15
654 %r = bitcast <16 x i8> %r7 to <8 x i16>
658 define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind {
659 ; SSE-LABEL: _clearupper16xi16b:
661 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
662 ; SSE-NEXT: andps %xmm2, %xmm0
663 ; SSE-NEXT: andps %xmm2, %xmm1
666 ; AVX-LABEL: _clearupper16xi16b:
668 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
670 %x8 = bitcast <16 x i16> %0 to <32 x i8>
671 %r0 = insertelement <32 x i8> %x8, i8 zeroinitializer, i32 1
672 %r1 = insertelement <32 x i8> %r0, i8 zeroinitializer, i32 3
673 %r2 = insertelement <32 x i8> %r1, i8 zeroinitializer, i32 5
674 %r3 = insertelement <32 x i8> %r2, i8 zeroinitializer, i32 7
675 %r4 = insertelement <32 x i8> %r3, i8 zeroinitializer, i32 9
676 %r5 = insertelement <32 x i8> %r4, i8 zeroinitializer, i32 11
677 %r6 = insertelement <32 x i8> %r5, i8 zeroinitializer, i32 13
678 %r7 = insertelement <32 x i8> %r6, i8 zeroinitializer, i32 15
679 %r8 = insertelement <32 x i8> %r7, i8 zeroinitializer, i32 17
680 %r9 = insertelement <32 x i8> %r8, i8 zeroinitializer, i32 19
681 %r10 = insertelement <32 x i8> %r9, i8 zeroinitializer, i32 21
682 %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23
683 %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25
684 %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27
685 %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29
686 %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31
687 %r = bitcast <32 x i8> %r15 to <16 x i16>
691 define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
692 ; SSE2-LABEL: _clearupper16xi8b:
694 ; SSE2-NEXT: pushq %rbx
695 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
696 ; SSE2-NEXT: movq %xmm1, %r10
697 ; SSE2-NEXT: movq %r10, %r8
698 ; SSE2-NEXT: shrq $56, %r8
699 ; SSE2-NEXT: andl $15, %r8d
700 ; SSE2-NEXT: movq %r10, %r9
701 ; SSE2-NEXT: shrq $48, %r9
702 ; SSE2-NEXT: andl $15, %r9d
703 ; SSE2-NEXT: movq %r10, %rsi
704 ; SSE2-NEXT: shrq $40, %rsi
705 ; SSE2-NEXT: andl $15, %esi
706 ; SSE2-NEXT: movq %r10, %r11
707 ; SSE2-NEXT: shrq $32, %r11
708 ; SSE2-NEXT: andl $15, %r11d
709 ; SSE2-NEXT: movq %xmm0, %rax
710 ; SSE2-NEXT: movq %rax, %rdx
711 ; SSE2-NEXT: shrq $56, %rdx
712 ; SSE2-NEXT: andl $15, %edx
713 ; SSE2-NEXT: movq %rax, %rcx
714 ; SSE2-NEXT: shrq $48, %rcx
715 ; SSE2-NEXT: andl $15, %ecx
716 ; SSE2-NEXT: movq %rax, %rdi
717 ; SSE2-NEXT: shrq $40, %rdi
718 ; SSE2-NEXT: andl $15, %edi
719 ; SSE2-NEXT: movq %rax, %rbx
720 ; SSE2-NEXT: shrq $32, %rbx
721 ; SSE2-NEXT: andl $15, %ebx
722 ; SSE2-NEXT: shlq $32, %rbx
723 ; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
724 ; SSE2-NEXT: orq %rbx, %rax
725 ; SSE2-NEXT: shlq $40, %rdi
726 ; SSE2-NEXT: orq %rax, %rdi
727 ; SSE2-NEXT: shlq $48, %rcx
728 ; SSE2-NEXT: orq %rdi, %rcx
729 ; SSE2-NEXT: shlq $56, %rdx
730 ; SSE2-NEXT: orq %rcx, %rdx
731 ; SSE2-NEXT: shlq $32, %r11
732 ; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F
733 ; SSE2-NEXT: orq %r11, %r10
734 ; SSE2-NEXT: shlq $40, %rsi
735 ; SSE2-NEXT: orq %r10, %rsi
736 ; SSE2-NEXT: shlq $48, %r9
737 ; SSE2-NEXT: orq %rsi, %r9
738 ; SSE2-NEXT: shlq $56, %r8
739 ; SSE2-NEXT: orq %r9, %r8
740 ; SSE2-NEXT: movq %rdx, %xmm0
741 ; SSE2-NEXT: movq %r8, %xmm1
742 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
743 ; SSE2-NEXT: popq %rbx
746 ; SSE42-LABEL: _clearupper16xi8b:
748 ; SSE42-NEXT: pushq %rbx
749 ; SSE42-NEXT: pextrq $1, %xmm0, %r10
750 ; SSE42-NEXT: movq %r10, %r8
751 ; SSE42-NEXT: shrq $56, %r8
752 ; SSE42-NEXT: andl $15, %r8d
753 ; SSE42-NEXT: movq %r10, %r9
754 ; SSE42-NEXT: shrq $48, %r9
755 ; SSE42-NEXT: andl $15, %r9d
756 ; SSE42-NEXT: movq %r10, %rsi
757 ; SSE42-NEXT: shrq $40, %rsi
758 ; SSE42-NEXT: andl $15, %esi
759 ; SSE42-NEXT: movq %r10, %r11
760 ; SSE42-NEXT: shrq $32, %r11
761 ; SSE42-NEXT: andl $15, %r11d
762 ; SSE42-NEXT: movq %xmm0, %rax
763 ; SSE42-NEXT: movq %rax, %rdx
764 ; SSE42-NEXT: shrq $56, %rdx
765 ; SSE42-NEXT: andl $15, %edx
766 ; SSE42-NEXT: movq %rax, %rcx
767 ; SSE42-NEXT: shrq $48, %rcx
768 ; SSE42-NEXT: andl $15, %ecx
769 ; SSE42-NEXT: movq %rax, %rdi
770 ; SSE42-NEXT: shrq $40, %rdi
771 ; SSE42-NEXT: andl $15, %edi
772 ; SSE42-NEXT: movq %rax, %rbx
773 ; SSE42-NEXT: shrq $32, %rbx
774 ; SSE42-NEXT: andl $15, %ebx
775 ; SSE42-NEXT: shlq $32, %rbx
776 ; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
777 ; SSE42-NEXT: orq %rbx, %rax
778 ; SSE42-NEXT: shlq $40, %rdi
779 ; SSE42-NEXT: orq %rax, %rdi
780 ; SSE42-NEXT: shlq $48, %rcx
781 ; SSE42-NEXT: orq %rdi, %rcx
782 ; SSE42-NEXT: shlq $56, %rdx
783 ; SSE42-NEXT: orq %rcx, %rdx
784 ; SSE42-NEXT: shlq $32, %r11
785 ; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F
786 ; SSE42-NEXT: orq %r11, %r10
787 ; SSE42-NEXT: shlq $40, %rsi
788 ; SSE42-NEXT: orq %r10, %rsi
789 ; SSE42-NEXT: shlq $48, %r9
790 ; SSE42-NEXT: orq %rsi, %r9
791 ; SSE42-NEXT: shlq $56, %r8
792 ; SSE42-NEXT: orq %r9, %r8
793 ; SSE42-NEXT: movq %r8, %xmm1
794 ; SSE42-NEXT: movq %rdx, %xmm0
795 ; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
796 ; SSE42-NEXT: popq %rbx
799 ; AVX-LABEL: _clearupper16xi8b:
801 ; AVX-NEXT: pushq %rbx
802 ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
803 ; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %r9
804 ; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
805 ; AVX-NEXT: movq %r9, %r8
806 ; AVX-NEXT: shrq $56, %r8
807 ; AVX-NEXT: andl $15, %r8d
808 ; AVX-NEXT: movq %r9, %r10
809 ; AVX-NEXT: shrq $48, %r10
810 ; AVX-NEXT: andl $15, %r10d
811 ; AVX-NEXT: movq %rcx, %rdx
812 ; AVX-NEXT: shldq $24, %r9, %rdx
813 ; AVX-NEXT: andl $15, %edx
814 ; AVX-NEXT: movq %r9, %r11
815 ; AVX-NEXT: shrq $32, %r11
816 ; AVX-NEXT: andl $15, %r11d
817 ; AVX-NEXT: movq %rcx, %rdi
818 ; AVX-NEXT: shrq $56, %rdi
819 ; AVX-NEXT: andl $15, %edi
820 ; AVX-NEXT: movq %rcx, %rsi
821 ; AVX-NEXT: shrq $48, %rsi
822 ; AVX-NEXT: andl $15, %esi
823 ; AVX-NEXT: movq %rcx, %rax
824 ; AVX-NEXT: shrq $40, %rax
825 ; AVX-NEXT: andl $15, %eax
826 ; AVX-NEXT: movq %rcx, %rbx
827 ; AVX-NEXT: shrq $32, %rbx
828 ; AVX-NEXT: andl $15, %ebx
829 ; AVX-NEXT: shlq $32, %rbx
830 ; AVX-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
831 ; AVX-NEXT: orq %rbx, %rcx
832 ; AVX-NEXT: shlq $40, %rax
833 ; AVX-NEXT: orq %rcx, %rax
834 ; AVX-NEXT: shlq $48, %rsi
835 ; AVX-NEXT: orq %rax, %rsi
836 ; AVX-NEXT: shlq $56, %rdi
837 ; AVX-NEXT: orq %rsi, %rdi
838 ; AVX-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
839 ; AVX-NEXT: shlq $32, %r11
840 ; AVX-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F
841 ; AVX-NEXT: orq %r11, %r9
842 ; AVX-NEXT: shlq $40, %rdx
843 ; AVX-NEXT: orq %r9, %rdx
844 ; AVX-NEXT: shlq $48, %r10
845 ; AVX-NEXT: orq %rdx, %r10
846 ; AVX-NEXT: shlq $56, %r8
847 ; AVX-NEXT: orq %r10, %r8
848 ; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
849 ; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
850 ; AVX-NEXT: popq %rbx
852 %x4 = bitcast <16 x i8> %0 to <32 x i4>
853 %r0 = insertelement <32 x i4> %x4, i4 zeroinitializer, i32 1
854 %r1 = insertelement <32 x i4> %r0, i4 zeroinitializer, i32 3
855 %r2 = insertelement <32 x i4> %r1, i4 zeroinitializer, i32 5
856 %r3 = insertelement <32 x i4> %r2, i4 zeroinitializer, i32 7
857 %r4 = insertelement <32 x i4> %r3, i4 zeroinitializer, i32 9
858 %r5 = insertelement <32 x i4> %r4, i4 zeroinitializer, i32 11
859 %r6 = insertelement <32 x i4> %r5, i4 zeroinitializer, i32 13
860 %r7 = insertelement <32 x i4> %r6, i4 zeroinitializer, i32 15
861 %r8 = insertelement <32 x i4> %r7, i4 zeroinitializer, i32 17
862 %r9 = insertelement <32 x i4> %r8, i4 zeroinitializer, i32 19
863 %r10 = insertelement <32 x i4> %r9, i4 zeroinitializer, i32 21
864 %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23
865 %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25
866 %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27
867 %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29
868 %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31
869 %r = bitcast <32 x i4> %r15 to <16 x i8>
873 define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
874 ; SSE2-LABEL: _clearupper32xi8b:
876 ; SSE2-NEXT: pushq %rbx
877 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
878 ; SSE2-NEXT: movq %xmm2, %r10
879 ; SSE2-NEXT: movq %r10, %r8
880 ; SSE2-NEXT: shrq $56, %r8
881 ; SSE2-NEXT: andl $15, %r8d
882 ; SSE2-NEXT: movq %r10, %r9
883 ; SSE2-NEXT: shrq $48, %r9
884 ; SSE2-NEXT: andl $15, %r9d
885 ; SSE2-NEXT: movq %r10, %rsi
886 ; SSE2-NEXT: shrq $40, %rsi
887 ; SSE2-NEXT: andl $15, %esi
888 ; SSE2-NEXT: movq %r10, %r11
889 ; SSE2-NEXT: shrq $32, %r11
890 ; SSE2-NEXT: andl $15, %r11d
891 ; SSE2-NEXT: movq %xmm0, %rax
892 ; SSE2-NEXT: movq %rax, %rdx
893 ; SSE2-NEXT: shrq $56, %rdx
894 ; SSE2-NEXT: andl $15, %edx
895 ; SSE2-NEXT: movq %rax, %rcx
896 ; SSE2-NEXT: shrq $48, %rcx
897 ; SSE2-NEXT: andl $15, %ecx
898 ; SSE2-NEXT: movq %rax, %rdi
899 ; SSE2-NEXT: shrq $40, %rdi
900 ; SSE2-NEXT: andl $15, %edi
901 ; SSE2-NEXT: movq %rax, %rbx
902 ; SSE2-NEXT: shrq $32, %rbx
903 ; SSE2-NEXT: andl $15, %ebx
904 ; SSE2-NEXT: shlq $32, %rbx
905 ; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
906 ; SSE2-NEXT: orq %rbx, %rax
907 ; SSE2-NEXT: shlq $40, %rdi
908 ; SSE2-NEXT: orq %rax, %rdi
909 ; SSE2-NEXT: shlq $48, %rcx
910 ; SSE2-NEXT: orq %rdi, %rcx
911 ; SSE2-NEXT: shlq $56, %rdx
912 ; SSE2-NEXT: orq %rcx, %rdx
913 ; SSE2-NEXT: shlq $32, %r11
914 ; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F
915 ; SSE2-NEXT: orq %r11, %r10
916 ; SSE2-NEXT: shlq $40, %rsi
917 ; SSE2-NEXT: orq %r10, %rsi
918 ; SSE2-NEXT: shlq $48, %r9
919 ; SSE2-NEXT: orq %rsi, %r9
920 ; SSE2-NEXT: shlq $56, %r8
921 ; SSE2-NEXT: orq %r9, %r8
922 ; SSE2-NEXT: movq %rdx, %xmm0
923 ; SSE2-NEXT: movq %r8, %xmm2
924 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
925 ; SSE2-NEXT: popq %rbx
928 ; SSE42-LABEL: _clearupper32xi8b:
930 ; SSE42-NEXT: pushq %rbx
931 ; SSE42-NEXT: pextrq $1, %xmm0, %r10
932 ; SSE42-NEXT: movq %r10, %r8
933 ; SSE42-NEXT: shrq $56, %r8
934 ; SSE42-NEXT: andl $15, %r8d
935 ; SSE42-NEXT: movq %r10, %r9
936 ; SSE42-NEXT: shrq $48, %r9
937 ; SSE42-NEXT: andl $15, %r9d
938 ; SSE42-NEXT: movq %r10, %rsi
939 ; SSE42-NEXT: shrq $40, %rsi
940 ; SSE42-NEXT: andl $15, %esi
941 ; SSE42-NEXT: movq %r10, %r11
942 ; SSE42-NEXT: shrq $32, %r11
943 ; SSE42-NEXT: andl $15, %r11d
944 ; SSE42-NEXT: movq %xmm0, %rax
945 ; SSE42-NEXT: movq %rax, %rdx
946 ; SSE42-NEXT: shrq $56, %rdx
947 ; SSE42-NEXT: andl $15, %edx
948 ; SSE42-NEXT: movq %rax, %rcx
949 ; SSE42-NEXT: shrq $48, %rcx
950 ; SSE42-NEXT: andl $15, %ecx
951 ; SSE42-NEXT: movq %rax, %rdi
952 ; SSE42-NEXT: shrq $40, %rdi
953 ; SSE42-NEXT: andl $15, %edi
954 ; SSE42-NEXT: movq %rax, %rbx
955 ; SSE42-NEXT: shrq $32, %rbx
956 ; SSE42-NEXT: andl $15, %ebx
957 ; SSE42-NEXT: shlq $32, %rbx
958 ; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
959 ; SSE42-NEXT: orq %rbx, %rax
960 ; SSE42-NEXT: shlq $40, %rdi
961 ; SSE42-NEXT: orq %rax, %rdi
962 ; SSE42-NEXT: shlq $48, %rcx
963 ; SSE42-NEXT: orq %rdi, %rcx
964 ; SSE42-NEXT: shlq $56, %rdx
965 ; SSE42-NEXT: orq %rcx, %rdx
966 ; SSE42-NEXT: shlq $32, %r11
967 ; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F
968 ; SSE42-NEXT: orq %r11, %r10
969 ; SSE42-NEXT: shlq $40, %rsi
970 ; SSE42-NEXT: orq %r10, %rsi
971 ; SSE42-NEXT: shlq $48, %r9
972 ; SSE42-NEXT: orq %rsi, %r9
973 ; SSE42-NEXT: shlq $56, %r8
974 ; SSE42-NEXT: orq %r9, %r8
975 ; SSE42-NEXT: movq %r8, %xmm2
976 ; SSE42-NEXT: movq %rdx, %xmm0
977 ; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
978 ; SSE42-NEXT: popq %rbx
981 ; AVX1-LABEL: _clearupper32xi8b:
983 ; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
984 ; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax
985 ; AVX1-NEXT: movq %rax, %r8
986 ; AVX1-NEXT: movq %rax, %rdx
987 ; AVX1-NEXT: movq %rax, %rsi
988 ; AVX1-NEXT: movq %rax, %rdi
989 ; AVX1-NEXT: movq %rax, %rcx
990 ; AVX1-NEXT: shrq $32, %rcx
991 ; AVX1-NEXT: andl $15, %ecx
992 ; AVX1-NEXT: shlq $32, %rcx
993 ; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
994 ; AVX1-NEXT: orq %rcx, %rax
995 ; AVX1-NEXT: shrq $40, %rdi
996 ; AVX1-NEXT: andl $15, %edi
997 ; AVX1-NEXT: shlq $40, %rdi
998 ; AVX1-NEXT: orq %rax, %rdi
999 ; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax
1000 ; AVX1-NEXT: shrq $48, %rsi
1001 ; AVX1-NEXT: andl $15, %esi
1002 ; AVX1-NEXT: shlq $48, %rsi
1003 ; AVX1-NEXT: orq %rdi, %rsi
1004 ; AVX1-NEXT: movq %rax, %rcx
1005 ; AVX1-NEXT: shrq $56, %rdx
1006 ; AVX1-NEXT: andl $15, %edx
1007 ; AVX1-NEXT: shlq $56, %rdx
1008 ; AVX1-NEXT: orq %rsi, %rdx
1009 ; AVX1-NEXT: movq %rax, %rsi
1010 ; AVX1-NEXT: shldq $24, %rax, %r8
1011 ; AVX1-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
1012 ; AVX1-NEXT: movq %rax, %rdx
1013 ; AVX1-NEXT: shrq $32, %rdx
1014 ; AVX1-NEXT: andl $15, %edx
1015 ; AVX1-NEXT: shlq $32, %rdx
1016 ; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
1017 ; AVX1-NEXT: orq %rdx, %rax
1018 ; AVX1-NEXT: andl $15, %r8d
1019 ; AVX1-NEXT: shlq $40, %r8
1020 ; AVX1-NEXT: orq %rax, %r8
1021 ; AVX1-NEXT: shrq $48, %rsi
1022 ; AVX1-NEXT: andl $15, %esi
1023 ; AVX1-NEXT: shlq $48, %rsi
1024 ; AVX1-NEXT: orq %r8, %rsi
1025 ; AVX1-NEXT: shrq $56, %rcx
1026 ; AVX1-NEXT: andl $15, %ecx
1027 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1028 ; AVX1-NEXT: shlq $56, %rcx
1029 ; AVX1-NEXT: orq %rsi, %rcx
1030 ; AVX1-NEXT: vmovq %xmm0, %rax
1031 ; AVX1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
1032 ; AVX1-NEXT: movl %eax, %ecx
1033 ; AVX1-NEXT: shrl $8, %ecx
1034 ; AVX1-NEXT: vmovd %eax, %xmm1
1035 ; AVX1-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
1036 ; AVX1-NEXT: movl %eax, %ecx
1037 ; AVX1-NEXT: shrl $16, %ecx
1038 ; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
1039 ; AVX1-NEXT: movl %eax, %ecx
1040 ; AVX1-NEXT: shrl $24, %ecx
1041 ; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
1042 ; AVX1-NEXT: movq %rax, %rcx
1043 ; AVX1-NEXT: shrq $32, %rcx
1044 ; AVX1-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
1045 ; AVX1-NEXT: movq %rax, %rcx
1046 ; AVX1-NEXT: shrq $40, %rcx
1047 ; AVX1-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
1048 ; AVX1-NEXT: movq %rax, %rcx
1049 ; AVX1-NEXT: shrq $48, %rcx
1050 ; AVX1-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
1051 ; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
1052 ; AVX1-NEXT: shrq $56, %rax
1053 ; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
1054 ; AVX1-NEXT: movl %ecx, %eax
1055 ; AVX1-NEXT: shrl $8, %eax
1056 ; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
1057 ; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
1058 ; AVX1-NEXT: movl %ecx, %eax
1059 ; AVX1-NEXT: shrl $16, %eax
1060 ; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1061 ; AVX1-NEXT: movl %ecx, %eax
1062 ; AVX1-NEXT: shrl $24, %eax
1063 ; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
1064 ; AVX1-NEXT: movq %rcx, %rax
1065 ; AVX1-NEXT: shrq $32, %rax
1066 ; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1067 ; AVX1-NEXT: movq %rcx, %rax
1068 ; AVX1-NEXT: shrq $40, %rax
1069 ; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
1070 ; AVX1-NEXT: movq %rcx, %rax
1071 ; AVX1-NEXT: shrq $48, %rax
1072 ; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1073 ; AVX1-NEXT: shrq $56, %rcx
1074 ; AVX1-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
1075 ; AVX1-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm1
1076 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1079 ; AVX2-LABEL: _clearupper32xi8b:
1081 ; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
1082 ; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax
1083 ; AVX2-NEXT: movq %rax, %r8
1084 ; AVX2-NEXT: movq %rax, %rdx
1085 ; AVX2-NEXT: movq %rax, %rsi
1086 ; AVX2-NEXT: movq %rax, %rdi
1087 ; AVX2-NEXT: movq %rax, %rcx
1088 ; AVX2-NEXT: shrq $32, %rcx
1089 ; AVX2-NEXT: andl $15, %ecx
1090 ; AVX2-NEXT: shlq $32, %rcx
1091 ; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
1092 ; AVX2-NEXT: orq %rcx, %rax
1093 ; AVX2-NEXT: shrq $40, %rdi
1094 ; AVX2-NEXT: andl $15, %edi
1095 ; AVX2-NEXT: shlq $40, %rdi
1096 ; AVX2-NEXT: orq %rax, %rdi
1097 ; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax
1098 ; AVX2-NEXT: shrq $48, %rsi
1099 ; AVX2-NEXT: andl $15, %esi
1100 ; AVX2-NEXT: shlq $48, %rsi
1101 ; AVX2-NEXT: orq %rdi, %rsi
1102 ; AVX2-NEXT: movq %rax, %rcx
1103 ; AVX2-NEXT: shrq $56, %rdx
1104 ; AVX2-NEXT: andl $15, %edx
1105 ; AVX2-NEXT: shlq $56, %rdx
1106 ; AVX2-NEXT: orq %rsi, %rdx
1107 ; AVX2-NEXT: movq %rax, %rsi
1108 ; AVX2-NEXT: shldq $24, %rax, %r8
1109 ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
1110 ; AVX2-NEXT: movq %rax, %rdx
1111 ; AVX2-NEXT: shrq $32, %rdx
1112 ; AVX2-NEXT: andl $15, %edx
1113 ; AVX2-NEXT: shlq $32, %rdx
1114 ; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
1115 ; AVX2-NEXT: orq %rdx, %rax
1116 ; AVX2-NEXT: andl $15, %r8d
1117 ; AVX2-NEXT: shlq $40, %r8
1118 ; AVX2-NEXT: orq %rax, %r8
1119 ; AVX2-NEXT: shrq $48, %rsi
1120 ; AVX2-NEXT: andl $15, %esi
1121 ; AVX2-NEXT: shlq $48, %rsi
1122 ; AVX2-NEXT: orq %r8, %rsi
1123 ; AVX2-NEXT: shrq $56, %rcx
1124 ; AVX2-NEXT: andl $15, %ecx
1125 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1126 ; AVX2-NEXT: shlq $56, %rcx
1127 ; AVX2-NEXT: orq %rsi, %rcx
1128 ; AVX2-NEXT: vmovq %xmm0, %rax
1129 ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
1130 ; AVX2-NEXT: movl %eax, %ecx
1131 ; AVX2-NEXT: shrl $8, %ecx
1132 ; AVX2-NEXT: vmovd %eax, %xmm1
1133 ; AVX2-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
1134 ; AVX2-NEXT: movl %eax, %ecx
1135 ; AVX2-NEXT: shrl $16, %ecx
1136 ; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
1137 ; AVX2-NEXT: movl %eax, %ecx
1138 ; AVX2-NEXT: shrl $24, %ecx
1139 ; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
1140 ; AVX2-NEXT: movq %rax, %rcx
1141 ; AVX2-NEXT: shrq $32, %rcx
1142 ; AVX2-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
1143 ; AVX2-NEXT: movq %rax, %rcx
1144 ; AVX2-NEXT: shrq $40, %rcx
1145 ; AVX2-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
1146 ; AVX2-NEXT: movq %rax, %rcx
1147 ; AVX2-NEXT: shrq $48, %rcx
1148 ; AVX2-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
1149 ; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
1150 ; AVX2-NEXT: shrq $56, %rax
1151 ; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
1152 ; AVX2-NEXT: movl %ecx, %eax
1153 ; AVX2-NEXT: shrl $8, %eax
1154 ; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
1155 ; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
1156 ; AVX2-NEXT: movl %ecx, %eax
1157 ; AVX2-NEXT: shrl $16, %eax
1158 ; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1159 ; AVX2-NEXT: movl %ecx, %eax
1160 ; AVX2-NEXT: shrl $24, %eax
1161 ; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
1162 ; AVX2-NEXT: movq %rcx, %rax
1163 ; AVX2-NEXT: shrq $32, %rax
1164 ; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1165 ; AVX2-NEXT: movq %rcx, %rax
1166 ; AVX2-NEXT: shrq $40, %rax
1167 ; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
1168 ; AVX2-NEXT: movq %rcx, %rax
1169 ; AVX2-NEXT: shrq $48, %rax
1170 ; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1171 ; AVX2-NEXT: shrq $56, %rcx
1172 ; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
1173 ; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm1
1174 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1176 %x4 = bitcast <32 x i8> %0 to <64 x i4>
1177 %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1
1178 %r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3
1179 %r2 = insertelement <64 x i4> %r1, i4 zeroinitializer, i32 5
1180 %r3 = insertelement <64 x i4> %r2, i4 zeroinitializer, i32 7
1181 %r4 = insertelement <64 x i4> %r3, i4 zeroinitializer, i32 9
1182 %r5 = insertelement <64 x i4> %r4, i4 zeroinitializer, i32 11
1183 %r6 = insertelement <64 x i4> %r5, i4 zeroinitializer, i32 13
1184 %r7 = insertelement <64 x i4> %r6, i4 zeroinitializer, i32 15
1185 %r8 = insertelement <64 x i4> %r7, i4 zeroinitializer, i32 17
1186 %r9 = insertelement <64 x i4> %r8, i4 zeroinitializer, i32 19
1187 %r10 = insertelement <64 x i4> %r9, i4 zeroinitializer, i32 21
1188 %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23
1189 %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25
1190 %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27
1191 %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29
1192 %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31
1193 %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33
1194 %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35
1195 %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37
1196 %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39
1197 %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41
1198 %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43
1199 %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45
1200 %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47
1201 %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49
1202 %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51
1203 %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53
1204 %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55
1205 %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57
1206 %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59
1207 %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61
1208 %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63
1209 %r = bitcast <64 x i4> %r15 to <32 x i8>
1213 define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind {
1214 ; SSE2-LABEL: _clearupper2xi64c:
1216 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1219 ; SSE42-LABEL: _clearupper2xi64c:
1221 ; SSE42-NEXT: xorps %xmm1, %xmm1
1222 ; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1225 ; AVX-LABEL: _clearupper2xi64c:
1227 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1228 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1230 %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0
1234 define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind {
1235 ; SSE2-LABEL: _clearupper4xi64c:
1237 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
1238 ; SSE2-NEXT: andps %xmm2, %xmm0
1239 ; SSE2-NEXT: andps %xmm2, %xmm1
1242 ; SSE42-LABEL: _clearupper4xi64c:
1244 ; SSE42-NEXT: xorps %xmm2, %xmm2
1245 ; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
1246 ; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1249 ; AVX-LABEL: _clearupper4xi64c:
1251 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1252 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1254 %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0
1258 define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind {
1259 ; SSE2-LABEL: _clearupper4xi32c:
1261 ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1264 ; SSE42-LABEL: _clearupper4xi32c:
1266 ; SSE42-NEXT: pxor %xmm1, %xmm1
1267 ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1270 ; AVX-LABEL: _clearupper4xi32c:
1272 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1273 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1275 %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0
1279 define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind {
1280 ; SSE2-LABEL: _clearupper8xi32c:
1282 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
1283 ; SSE2-NEXT: andps %xmm2, %xmm0
1284 ; SSE2-NEXT: andps %xmm2, %xmm1
1287 ; SSE42-LABEL: _clearupper8xi32c:
1289 ; SSE42-NEXT: pxor %xmm2, %xmm2
1290 ; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1291 ; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
1294 ; AVX1-LABEL: _clearupper8xi32c:
1296 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1299 ; AVX2-LABEL: _clearupper8xi32c:
1301 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1302 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1304 %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0
1308 define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind {
1309 ; SSE-LABEL: _clearupper8xi16c:
1311 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1314 ; AVX-LABEL: _clearupper8xi16c:
1316 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1318 %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0
1322 define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind {
1323 ; SSE-LABEL: _clearupper16xi16c:
1325 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1326 ; SSE-NEXT: andps %xmm2, %xmm0
1327 ; SSE-NEXT: andps %xmm2, %xmm1
1330 ; AVX-LABEL: _clearupper16xi16c:
1332 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1334 %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0
1338 define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind {
1339 ; SSE-LABEL: _clearupper16xi8c:
1341 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1344 ; AVX-LABEL: _clearupper16xi8c:
1346 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1348 %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0
1352 define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind {
1353 ; SSE-LABEL: _clearupper32xi8c:
1355 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1356 ; SSE-NEXT: andps %xmm2, %xmm0
1357 ; SSE-NEXT: andps %xmm2, %xmm1
1360 ; AVX-LABEL: _clearupper32xi8c:
1362 ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1364 %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0