1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2
4 ; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
6 declare <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64>, <2 x i64>)
7 declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>)
8 declare <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16>, <8 x i16>)
9 declare <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8>, <16 x i8>)
11 define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
12 ; X64-LABEL: vec_v2i64:
14 ; X64-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
15 ; X64-NEXT: movdqa %xmm2, %xmm3
16 ; X64-NEXT: psrlq %xmm1, %xmm3
17 ; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
18 ; X64-NEXT: movdqa %xmm2, %xmm5
19 ; X64-NEXT: psrlq %xmm4, %xmm5
20 ; X64-NEXT: movsd {{.*#+}} xmm5 = xmm3[0],xmm5[1]
21 ; X64-NEXT: movdqa %xmm0, %xmm6
22 ; X64-NEXT: psllq %xmm1, %xmm6
23 ; X64-NEXT: movdqa %xmm0, %xmm3
24 ; X64-NEXT: psllq %xmm4, %xmm3
25 ; X64-NEXT: movdqa %xmm3, %xmm7
26 ; X64-NEXT: movsd {{.*#+}} xmm3 = xmm6[0],xmm3[1]
27 ; X64-NEXT: psrlq %xmm1, %xmm6
28 ; X64-NEXT: psrlq %xmm4, %xmm7
29 ; X64-NEXT: movsd {{.*#+}} xmm7 = xmm6[0],xmm7[1]
30 ; X64-NEXT: xorpd %xmm5, %xmm7
31 ; X64-NEXT: psubq %xmm5, %xmm7
32 ; X64-NEXT: pcmpeqd %xmm0, %xmm7
33 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,0,3,2]
34 ; X64-NEXT: pand %xmm7, %xmm1
35 ; X64-NEXT: andpd %xmm1, %xmm3
36 ; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
37 ; X64-NEXT: pand %xmm2, %xmm0
38 ; X64-NEXT: pxor %xmm5, %xmm5
39 ; X64-NEXT: pcmpgtd %xmm4, %xmm5
40 ; X64-NEXT: por %xmm2, %xmm5
41 ; X64-NEXT: pcmpeqd %xmm2, %xmm2
42 ; X64-NEXT: pxor %xmm5, %xmm2
43 ; X64-NEXT: por %xmm0, %xmm2
44 ; X64-NEXT: pandn %xmm2, %xmm1
45 ; X64-NEXT: por %xmm3, %xmm1
46 ; X64-NEXT: movdqa %xmm1, %xmm0
49 ; X64-AVX2-LABEL: vec_v2i64:
51 ; X64-AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
52 ; X64-AVX2-NEXT: # xmm2 = mem[0,0]
53 ; X64-AVX2-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775807,9223372036854775807]
54 ; X64-AVX2-NEXT: # xmm3 = mem[0,0]
55 ; X64-AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm3
56 ; X64-AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
57 ; X64-AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm4
58 ; X64-AVX2-NEXT: vpsrlvq %xmm1, %xmm4, %xmm1
59 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
60 ; X64-AVX2-NEXT: vpsubq %xmm2, %xmm1, %xmm1
61 ; X64-AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
62 ; X64-AVX2-NEXT: vblendvpd %xmm0, %xmm4, %xmm3, %xmm0
65 ; X86-LABEL: vec_v2i64:
67 ; X86-NEXT: pushl %ebp
68 ; X86-NEXT: pushl %ebx
69 ; X86-NEXT: pushl %edi
70 ; X86-NEXT: pushl %esi
71 ; X86-NEXT: subl $20, %esp
72 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
73 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
74 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
75 ; X86-NEXT: movl %edx, %eax
76 ; X86-NEXT: shll %cl, %eax
77 ; X86-NEXT: shldl %cl, %edx, %edi
78 ; X86-NEXT: xorl %edx, %edx
79 ; X86-NEXT: testb $32, %cl
80 ; X86-NEXT: cmovnel %eax, %edi
81 ; X86-NEXT: cmovnel %edx, %eax
82 ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
83 ; X86-NEXT: movl %edi, %ebx
84 ; X86-NEXT: sarl %cl, %ebx
85 ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
86 ; X86-NEXT: movl %edi, %eax
87 ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
88 ; X86-NEXT: sarl $31, %eax
89 ; X86-NEXT: testb $32, %cl
90 ; X86-NEXT: cmovel %ebx, %eax
91 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
92 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
93 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
94 ; X86-NEXT: movl %esi, %eax
95 ; X86-NEXT: movb %ch, %cl
96 ; X86-NEXT: shll %cl, %eax
97 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
98 ; X86-NEXT: movl %ebp, %ebx
99 ; X86-NEXT: shldl %cl, %esi, %ebx
100 ; X86-NEXT: testb $32, %ch
101 ; X86-NEXT: cmovnel %eax, %ebx
102 ; X86-NEXT: cmovnel %edx, %eax
103 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
104 ; X86-NEXT: movl %ebx, %esi
105 ; X86-NEXT: sarl %cl, %esi
106 ; X86-NEXT: movl %ebx, %edx
107 ; X86-NEXT: sarl $31, %edx
108 ; X86-NEXT: testb $32, %ch
109 ; X86-NEXT: cmovel %esi, %edx
110 ; X86-NEXT: movl (%esp), %eax # 4-byte Reload
111 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
112 ; X86-NEXT: shrdl %cl, %edi, %eax
113 ; X86-NEXT: testb $32, %cl
114 ; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
115 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
116 ; X86-NEXT: movb %ch, %cl
117 ; X86-NEXT: shrdl %cl, %ebx, %edi
118 ; X86-NEXT: testb $32, %ch
119 ; X86-NEXT: cmovnel %esi, %edi
120 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
121 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
122 ; X86-NEXT: xorl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
123 ; X86-NEXT: sarl $31, %esi
124 ; X86-NEXT: movl %esi, %ecx
125 ; X86-NEXT: xorl $2147483647, %ecx # imm = 0x7FFFFFFF
126 ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
127 ; X86-NEXT: notl %esi
128 ; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload
129 ; X86-NEXT: movl %esi, (%esp) # 4-byte Spill
130 ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
131 ; X86-NEXT: xorl {{[0-9]+}}(%esp), %edi
132 ; X86-NEXT: xorl %ebp, %edx
133 ; X86-NEXT: sarl $31, %ebp
134 ; X86-NEXT: movl %ebp, %esi
135 ; X86-NEXT: xorl $2147483647, %esi # imm = 0x7FFFFFFF
136 ; X86-NEXT: orl %edx, %edi
137 ; X86-NEXT: notl %ebp
138 ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
139 ; X86-NEXT: cmovel %ebx, %esi
140 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
141 ; X86-NEXT: movl %esi, 12(%eax)
142 ; X86-NEXT: movl %ebp, 8(%eax)
143 ; X86-NEXT: movl %ecx, 4(%eax)
144 ; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
145 ; X86-NEXT: movl %ecx, (%eax)
146 ; X86-NEXT: addl $20, %esp
147 ; X86-NEXT: popl %esi
148 ; X86-NEXT: popl %edi
149 ; X86-NEXT: popl %ebx
150 ; X86-NEXT: popl %ebp
152 %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
156 define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
157 ; X64-LABEL: vec_v4i32:
159 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
160 ; X64-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[2,3,3,3,4,5,6,7]
161 ; X64-NEXT: pshuflw {{.*#+}} xmm5 = xmm1[0,1,1,1,4,5,6,7]
162 ; X64-NEXT: pslld $23, %xmm1
163 ; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
164 ; X64-NEXT: cvttps2dq %xmm1, %xmm6
165 ; X64-NEXT: movdqa %xmm0, %xmm1
166 ; X64-NEXT: pmuludq %xmm6, %xmm1
167 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
168 ; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
169 ; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
170 ; X64-NEXT: pmuludq %xmm7, %xmm6
171 ; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
172 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1]
173 ; X64-NEXT: pshuflw {{.*#+}} xmm6 = xmm3[2,3,3,3,4,5,6,7]
174 ; X64-NEXT: movdqa %xmm2, %xmm7
175 ; X64-NEXT: psrad %xmm6, %xmm7
176 ; X64-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,1,1,4,5,6,7]
177 ; X64-NEXT: movdqa %xmm1, %xmm6
178 ; X64-NEXT: psrad %xmm3, %xmm6
179 ; X64-NEXT: punpckhqdq {{.*#+}} xmm6 = xmm6[1],xmm7[1]
180 ; X64-NEXT: movdqa %xmm2, %xmm3
181 ; X64-NEXT: psrad %xmm4, %xmm3
182 ; X64-NEXT: psrad %xmm5, %xmm1
183 ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
184 ; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm6[0,3]
185 ; X64-NEXT: pcmpeqd %xmm0, %xmm1
186 ; X64-NEXT: pand %xmm1, %xmm2
187 ; X64-NEXT: pxor %xmm3, %xmm3
188 ; X64-NEXT: pcmpgtd %xmm0, %xmm3
189 ; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
190 ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
191 ; X64-NEXT: por %xmm3, %xmm0
192 ; X64-NEXT: pandn %xmm0, %xmm1
193 ; X64-NEXT: por %xmm2, %xmm1
194 ; X64-NEXT: movdqa %xmm1, %xmm0
197 ; X64-AVX2-LABEL: vec_v4i32:
199 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
200 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
201 ; X64-AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm2
202 ; X64-AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm3
203 ; X64-AVX2-NEXT: vpsravd %xmm1, %xmm3, %xmm1
204 ; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
205 ; X64-AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
206 ; X64-AVX2-NEXT: retq
208 ; X86-LABEL: vec_v4i32:
210 ; X86-NEXT: pushl %ebp
211 ; X86-NEXT: pushl %ebx
212 ; X86-NEXT: pushl %edi
213 ; X86-NEXT: pushl %esi
214 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
215 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
216 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
217 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
218 ; X86-NEXT: movl %edi, %edx
219 ; X86-NEXT: shll %cl, %edx
220 ; X86-NEXT: movl %edx, %ebp
221 ; X86-NEXT: sarl %cl, %ebp
222 ; X86-NEXT: xorl %ebx, %ebx
223 ; X86-NEXT: testl %edi, %edi
225 ; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
226 ; X86-NEXT: cmpl %ebp, %edi
227 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
228 ; X86-NEXT: cmovel %edx, %ebx
229 ; X86-NEXT: movl %edi, %ebp
230 ; X86-NEXT: movb %ch, %cl
231 ; X86-NEXT: shll %cl, %ebp
232 ; X86-NEXT: movl %ebp, %eax
233 ; X86-NEXT: sarl %cl, %eax
234 ; X86-NEXT: xorl %edx, %edx
235 ; X86-NEXT: testl %edi, %edi
237 ; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
238 ; X86-NEXT: cmpl %eax, %edi
239 ; X86-NEXT: cmovel %ebp, %edx
240 ; X86-NEXT: movl %esi, %edi
241 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
242 ; X86-NEXT: shll %cl, %edi
243 ; X86-NEXT: movl %edi, %ebp
244 ; X86-NEXT: sarl %cl, %ebp
245 ; X86-NEXT: xorl %eax, %eax
246 ; X86-NEXT: testl %esi, %esi
248 ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
249 ; X86-NEXT: cmpl %ebp, %esi
250 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
251 ; X86-NEXT: cmovel %edi, %eax
252 ; X86-NEXT: movl %esi, %edi
253 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
254 ; X86-NEXT: shll %cl, %edi
255 ; X86-NEXT: movl %edi, %ebp
256 ; X86-NEXT: sarl %cl, %ebp
257 ; X86-NEXT: xorl %ecx, %ecx
258 ; X86-NEXT: testl %esi, %esi
260 ; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
261 ; X86-NEXT: cmpl %ebp, %esi
262 ; X86-NEXT: cmovel %edi, %ecx
263 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
264 ; X86-NEXT: movl %ecx, 12(%esi)
265 ; X86-NEXT: movl %eax, 8(%esi)
266 ; X86-NEXT: movl %edx, 4(%esi)
267 ; X86-NEXT: movl %ebx, (%esi)
268 ; X86-NEXT: movl %esi, %eax
269 ; X86-NEXT: popl %esi
270 ; X86-NEXT: popl %edi
271 ; X86-NEXT: popl %ebx
272 ; X86-NEXT: popl %ebp
274 %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
278 define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
279 ; X64-LABEL: vec_v8i16:
281 ; X64-NEXT: movdqa %xmm1, %xmm2
282 ; X64-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
283 ; X64-NEXT: pslld $23, %xmm2
284 ; X64-NEXT: movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
285 ; X64-NEXT: paddd %xmm3, %xmm2
286 ; X64-NEXT: cvttps2dq %xmm2, %xmm2
287 ; X64-NEXT: pslld $16, %xmm2
288 ; X64-NEXT: psrad $16, %xmm2
289 ; X64-NEXT: movdqa %xmm1, %xmm4
290 ; X64-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3]
291 ; X64-NEXT: pslld $23, %xmm4
292 ; X64-NEXT: paddd %xmm3, %xmm4
293 ; X64-NEXT: cvttps2dq %xmm4, %xmm3
294 ; X64-NEXT: pslld $16, %xmm3
295 ; X64-NEXT: psrad $16, %xmm3
296 ; X64-NEXT: packssdw %xmm2, %xmm3
297 ; X64-NEXT: pmullw %xmm0, %xmm3
298 ; X64-NEXT: psllw $12, %xmm1
299 ; X64-NEXT: movdqa %xmm1, %xmm2
300 ; X64-NEXT: psraw $15, %xmm2
301 ; X64-NEXT: movdqa %xmm3, %xmm4
302 ; X64-NEXT: psraw $8, %xmm4
303 ; X64-NEXT: pand %xmm2, %xmm4
304 ; X64-NEXT: pandn %xmm3, %xmm2
305 ; X64-NEXT: por %xmm4, %xmm2
306 ; X64-NEXT: paddw %xmm1, %xmm1
307 ; X64-NEXT: movdqa %xmm1, %xmm4
308 ; X64-NEXT: psraw $15, %xmm4
309 ; X64-NEXT: movdqa %xmm4, %xmm5
310 ; X64-NEXT: pandn %xmm2, %xmm5
311 ; X64-NEXT: psraw $4, %xmm2
312 ; X64-NEXT: pand %xmm4, %xmm2
313 ; X64-NEXT: por %xmm5, %xmm2
314 ; X64-NEXT: paddw %xmm1, %xmm1
315 ; X64-NEXT: movdqa %xmm1, %xmm4
316 ; X64-NEXT: psraw $15, %xmm4
317 ; X64-NEXT: movdqa %xmm4, %xmm5
318 ; X64-NEXT: pandn %xmm2, %xmm5
319 ; X64-NEXT: psraw $2, %xmm2
320 ; X64-NEXT: pand %xmm4, %xmm2
321 ; X64-NEXT: por %xmm5, %xmm2
322 ; X64-NEXT: paddw %xmm1, %xmm1
323 ; X64-NEXT: psraw $15, %xmm1
324 ; X64-NEXT: movdqa %xmm1, %xmm4
325 ; X64-NEXT: pandn %xmm2, %xmm4
326 ; X64-NEXT: psraw $1, %xmm2
327 ; X64-NEXT: pand %xmm1, %xmm2
328 ; X64-NEXT: por %xmm4, %xmm2
329 ; X64-NEXT: pcmpeqw %xmm0, %xmm2
330 ; X64-NEXT: pand %xmm2, %xmm3
331 ; X64-NEXT: pxor %xmm1, %xmm1
332 ; X64-NEXT: pcmpgtw %xmm0, %xmm1
333 ; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
334 ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
335 ; X64-NEXT: por %xmm1, %xmm0
336 ; X64-NEXT: pandn %xmm0, %xmm2
337 ; X64-NEXT: por %xmm3, %xmm2
338 ; X64-NEXT: movdqa %xmm2, %xmm0
341 ; X64-AVX2-LABEL: vec_v8i16:
343 ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
344 ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
345 ; X64-AVX2-NEXT: vpsllvd %ymm1, %ymm2, %ymm2
346 ; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
347 ; X64-AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
348 ; X64-AVX2-NEXT: vpmovsxwd %xmm2, %ymm3
349 ; X64-AVX2-NEXT: vpsravd %ymm1, %ymm3, %ymm1
350 ; X64-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
351 ; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
352 ; X64-AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1
353 ; X64-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
354 ; X64-AVX2-NEXT: vpcmpgtw %xmm0, %xmm3, %xmm0
355 ; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm3 = [32767,32767,32767,32767,32767,32767,32767,32767]
356 ; X64-AVX2-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0
357 ; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
358 ; X64-AVX2-NEXT: vzeroupper
359 ; X64-AVX2-NEXT: retq
361 ; X86-LABEL: vec_v8i16:
363 ; X86-NEXT: pushl %ebp
364 ; X86-NEXT: pushl %ebx
365 ; X86-NEXT: pushl %edi
366 ; X86-NEXT: pushl %esi
367 ; X86-NEXT: subl $16, %esp
368 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
369 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
370 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
371 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
372 ; X86-NEXT: movl %edi, %ebx
373 ; X86-NEXT: shll %cl, %ebx
374 ; X86-NEXT: movswl %bx, %ebp
375 ; X86-NEXT: sarl %cl, %ebp
376 ; X86-NEXT: xorl %ecx, %ecx
377 ; X86-NEXT: testw %di, %di
379 ; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF
380 ; X86-NEXT: cmpw %bp, %di
381 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
382 ; X86-NEXT: cmovel %ebx, %ecx
383 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
384 ; X86-NEXT: movl %esi, %edi
385 ; X86-NEXT: movl %eax, %ecx
386 ; X86-NEXT: shll %cl, %edi
387 ; X86-NEXT: movswl %di, %ebx
388 ; X86-NEXT: sarl %cl, %ebx
389 ; X86-NEXT: xorl %eax, %eax
390 ; X86-NEXT: testw %si, %si
392 ; X86-NEXT: addl $32767, %eax # imm = 0x7FFF
393 ; X86-NEXT: cmpw %bx, %si
394 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
395 ; X86-NEXT: cmovel %edi, %eax
396 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
397 ; X86-NEXT: movl %edx, %esi
398 ; X86-NEXT: shll %cl, %esi
399 ; X86-NEXT: movswl %si, %edi
400 ; X86-NEXT: sarl %cl, %edi
401 ; X86-NEXT: xorl %eax, %eax
402 ; X86-NEXT: testw %dx, %dx
404 ; X86-NEXT: addl $32767, %eax # imm = 0x7FFF
405 ; X86-NEXT: cmpw %di, %dx
406 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
407 ; X86-NEXT: cmovel %esi, %eax
408 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
409 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
410 ; X86-NEXT: movl %eax, %edx
411 ; X86-NEXT: shll %cl, %edx
412 ; X86-NEXT: movswl %dx, %esi
413 ; X86-NEXT: sarl %cl, %esi
414 ; X86-NEXT: xorl %ebx, %ebx
415 ; X86-NEXT: testw %ax, %ax
417 ; X86-NEXT: addl $32767, %ebx # imm = 0x7FFF
418 ; X86-NEXT: cmpw %si, %ax
419 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
420 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
421 ; X86-NEXT: cmovel %edx, %ebx
422 ; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
423 ; X86-NEXT: movl %eax, %edx
424 ; X86-NEXT: shll %cl, %edx
425 ; X86-NEXT: movswl %dx, %esi
426 ; X86-NEXT: sarl %cl, %esi
427 ; X86-NEXT: xorl %ecx, %ecx
428 ; X86-NEXT: testw %ax, %ax
430 ; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF
431 ; X86-NEXT: cmpw %si, %ax
432 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
433 ; X86-NEXT: cmovel %edx, %ecx
434 ; X86-NEXT: movl %ecx, %ebp
435 ; X86-NEXT: movl %eax, %edx
436 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
437 ; X86-NEXT: shll %cl, %edx
438 ; X86-NEXT: movswl %dx, %esi
439 ; X86-NEXT: sarl %cl, %esi
440 ; X86-NEXT: xorl %ebx, %ebx
441 ; X86-NEXT: testw %ax, %ax
443 ; X86-NEXT: addl $32767, %ebx # imm = 0x7FFF
444 ; X86-NEXT: cmpw %si, %ax
445 ; X86-NEXT: cmovel %edx, %ebx
446 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
447 ; X86-NEXT: movl %eax, %esi
448 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
449 ; X86-NEXT: shll %cl, %esi
450 ; X86-NEXT: movswl %si, %edi
451 ; X86-NEXT: sarl %cl, %edi
452 ; X86-NEXT: xorl %edx, %edx
453 ; X86-NEXT: testw %ax, %ax
455 ; X86-NEXT: addl $32767, %edx # imm = 0x7FFF
456 ; X86-NEXT: cmpw %di, %ax
457 ; X86-NEXT: cmovel %esi, %edx
458 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
459 ; X86-NEXT: movl %eax, %esi
460 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
461 ; X86-NEXT: shll %cl, %esi
462 ; X86-NEXT: movswl %si, %edi
463 ; X86-NEXT: sarl %cl, %edi
464 ; X86-NEXT: xorl %ecx, %ecx
465 ; X86-NEXT: testw %ax, %ax
467 ; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF
468 ; X86-NEXT: cmpw %di, %ax
469 ; X86-NEXT: cmovel %esi, %ecx
470 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
471 ; X86-NEXT: movw %cx, 14(%eax)
472 ; X86-NEXT: movw %dx, 12(%eax)
473 ; X86-NEXT: movw %bx, 10(%eax)
474 ; X86-NEXT: movw %bp, 8(%eax)
475 ; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
476 ; X86-NEXT: movw %cx, 6(%eax)
477 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
478 ; X86-NEXT: movw %cx, 4(%eax)
479 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
480 ; X86-NEXT: movw %cx, 2(%eax)
481 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
482 ; X86-NEXT: movw %cx, (%eax)
483 ; X86-NEXT: addl $16, %esp
484 ; X86-NEXT: popl %esi
485 ; X86-NEXT: popl %edi
486 ; X86-NEXT: popl %ebx
487 ; X86-NEXT: popl %ebp
489 %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
493 define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
494 ; X64-LABEL: vec_v16i8:
496 ; X64-NEXT: psllw $5, %xmm1
497 ; X64-NEXT: pxor %xmm3, %xmm3
498 ; X64-NEXT: pxor %xmm4, %xmm4
499 ; X64-NEXT: pcmpgtb %xmm1, %xmm4
500 ; X64-NEXT: movdqa %xmm0, %xmm2
501 ; X64-NEXT: psllw $4, %xmm2
502 ; X64-NEXT: pand %xmm4, %xmm2
503 ; X64-NEXT: pandn %xmm0, %xmm4
504 ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
505 ; X64-NEXT: por %xmm4, %xmm2
506 ; X64-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
507 ; X64-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
508 ; X64-NEXT: paddb %xmm1, %xmm1
509 ; X64-NEXT: pxor %xmm6, %xmm6
510 ; X64-NEXT: pcmpgtb %xmm1, %xmm6
511 ; X64-NEXT: movdqa %xmm6, %xmm7
512 ; X64-NEXT: pandn %xmm2, %xmm7
513 ; X64-NEXT: psllw $2, %xmm2
514 ; X64-NEXT: pand %xmm6, %xmm2
515 ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
516 ; X64-NEXT: por %xmm7, %xmm2
517 ; X64-NEXT: paddb %xmm1, %xmm1
518 ; X64-NEXT: pxor %xmm6, %xmm6
519 ; X64-NEXT: pcmpgtb %xmm1, %xmm6
520 ; X64-NEXT: movdqa %xmm6, %xmm1
521 ; X64-NEXT: pandn %xmm2, %xmm1
522 ; X64-NEXT: paddb %xmm2, %xmm2
523 ; X64-NEXT: pand %xmm6, %xmm2
524 ; X64-NEXT: por %xmm1, %xmm2
525 ; X64-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm2[8],xmm6[9],xmm2[9],xmm6[10],xmm2[10],xmm6[11],xmm2[11],xmm6[12],xmm2[12],xmm6[13],xmm2[13],xmm6[14],xmm2[14],xmm6[15],xmm2[15]
526 ; X64-NEXT: pxor %xmm1, %xmm1
527 ; X64-NEXT: pcmpgtw %xmm4, %xmm1
528 ; X64-NEXT: movdqa %xmm1, %xmm7
529 ; X64-NEXT: pandn %xmm6, %xmm7
530 ; X64-NEXT: psraw $4, %xmm6
531 ; X64-NEXT: pand %xmm1, %xmm6
532 ; X64-NEXT: por %xmm7, %xmm6
533 ; X64-NEXT: paddw %xmm4, %xmm4
534 ; X64-NEXT: pxor %xmm1, %xmm1
535 ; X64-NEXT: pcmpgtw %xmm4, %xmm1
536 ; X64-NEXT: movdqa %xmm1, %xmm7
537 ; X64-NEXT: pandn %xmm6, %xmm7
538 ; X64-NEXT: psraw $2, %xmm6
539 ; X64-NEXT: pand %xmm1, %xmm6
540 ; X64-NEXT: por %xmm7, %xmm6
541 ; X64-NEXT: paddw %xmm4, %xmm4
542 ; X64-NEXT: pxor %xmm1, %xmm1
543 ; X64-NEXT: pcmpgtw %xmm4, %xmm1
544 ; X64-NEXT: movdqa %xmm1, %xmm4
545 ; X64-NEXT: pandn %xmm6, %xmm4
546 ; X64-NEXT: psraw $1, %xmm6
547 ; X64-NEXT: pand %xmm1, %xmm6
548 ; X64-NEXT: por %xmm4, %xmm6
549 ; X64-NEXT: psrlw $8, %xmm6
550 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
551 ; X64-NEXT: pxor %xmm4, %xmm4
552 ; X64-NEXT: pcmpgtw %xmm5, %xmm4
553 ; X64-NEXT: movdqa %xmm4, %xmm7
554 ; X64-NEXT: pandn %xmm1, %xmm7
555 ; X64-NEXT: psraw $4, %xmm1
556 ; X64-NEXT: pand %xmm4, %xmm1
557 ; X64-NEXT: por %xmm7, %xmm1
558 ; X64-NEXT: paddw %xmm5, %xmm5
559 ; X64-NEXT: pxor %xmm4, %xmm4
560 ; X64-NEXT: pcmpgtw %xmm5, %xmm4
561 ; X64-NEXT: movdqa %xmm4, %xmm7
562 ; X64-NEXT: pandn %xmm1, %xmm7
563 ; X64-NEXT: psraw $2, %xmm1
564 ; X64-NEXT: pand %xmm4, %xmm1
565 ; X64-NEXT: por %xmm7, %xmm1
566 ; X64-NEXT: paddw %xmm5, %xmm5
567 ; X64-NEXT: pxor %xmm4, %xmm4
568 ; X64-NEXT: pcmpgtw %xmm5, %xmm4
569 ; X64-NEXT: movdqa %xmm4, %xmm5
570 ; X64-NEXT: pandn %xmm1, %xmm5
571 ; X64-NEXT: psraw $1, %xmm1
572 ; X64-NEXT: pand %xmm4, %xmm1
573 ; X64-NEXT: por %xmm5, %xmm1
574 ; X64-NEXT: psrlw $8, %xmm1
575 ; X64-NEXT: packuswb %xmm6, %xmm1
576 ; X64-NEXT: pcmpeqb %xmm0, %xmm1
577 ; X64-NEXT: pand %xmm1, %xmm2
578 ; X64-NEXT: pcmpgtb %xmm0, %xmm3
579 ; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
580 ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
581 ; X64-NEXT: por %xmm3, %xmm0
582 ; X64-NEXT: pandn %xmm0, %xmm1
583 ; X64-NEXT: por %xmm2, %xmm1
584 ; X64-NEXT: movdqa %xmm1, %xmm0
587 ; X64-AVX2-LABEL: vec_v16i8:
589 ; X64-AVX2-NEXT: vpsllw $5, %xmm1, %xmm1
590 ; X64-AVX2-NEXT: vpsllw $4, %xmm0, %xmm2
591 ; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
592 ; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm2
593 ; X64-AVX2-NEXT: vpsllw $2, %xmm2, %xmm3
594 ; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
595 ; X64-AVX2-NEXT: vpaddb %xmm1, %xmm1, %xmm4
596 ; X64-AVX2-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
597 ; X64-AVX2-NEXT: vpaddb %xmm2, %xmm2, %xmm3
598 ; X64-AVX2-NEXT: vpaddb %xmm4, %xmm4, %xmm4
599 ; X64-AVX2-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
600 ; X64-AVX2-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
601 ; X64-AVX2-NEXT: vpsraw $4, %xmm3, %xmm4
602 ; X64-AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
603 ; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
604 ; X64-AVX2-NEXT: vpsraw $2, %xmm3, %xmm4
605 ; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm5
606 ; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
607 ; X64-AVX2-NEXT: vpsraw $1, %xmm3, %xmm4
608 ; X64-AVX2-NEXT: vpaddw %xmm5, %xmm5, %xmm5
609 ; X64-AVX2-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
610 ; X64-AVX2-NEXT: vpsrlw $8, %xmm3, %xmm3
611 ; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
612 ; X64-AVX2-NEXT: vpsraw $4, %xmm4, %xmm5
613 ; X64-AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
614 ; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm4
615 ; X64-AVX2-NEXT: vpsraw $2, %xmm4, %xmm5
616 ; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm1
617 ; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm4
618 ; X64-AVX2-NEXT: vpsraw $1, %xmm4, %xmm5
619 ; X64-AVX2-NEXT: vpaddw %xmm1, %xmm1, %xmm1
620 ; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm5, %xmm4, %xmm1
621 ; X64-AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1
622 ; X64-AVX2-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
623 ; X64-AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
624 ; X64-AVX2-NEXT: vpbroadcastb {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
625 ; X64-AVX2-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm0
626 ; X64-AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
627 ; X64-AVX2-NEXT: retq
629 ; X86-LABEL: vec_v16i8:
631 ; X86-NEXT: pushl %ebp
632 ; X86-NEXT: pushl %ebx
633 ; X86-NEXT: pushl %edi
634 ; X86-NEXT: pushl %esi
635 ; X86-NEXT: subl $44, %esp
636 ; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
637 ; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
638 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
639 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
640 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
641 ; X86-NEXT: movb %ch, %bh
642 ; X86-NEXT: shlb %cl, %bh
643 ; X86-NEXT: movzbl %bh, %esi
644 ; X86-NEXT: sarb %cl, %bh
645 ; X86-NEXT: xorl %eax, %eax
646 ; X86-NEXT: testb %ch, %ch
648 ; X86-NEXT: addl $127, %eax
649 ; X86-NEXT: cmpb %bh, %ch
650 ; X86-NEXT: cmovel %esi, %eax
651 ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
652 ; X86-NEXT: movl %ebx, %eax
653 ; X86-NEXT: movl %edx, %ecx
654 ; X86-NEXT: shlb %cl, %al
655 ; X86-NEXT: movzbl %al, %esi
656 ; X86-NEXT: sarb %cl, %al
657 ; X86-NEXT: xorl %ecx, %ecx
658 ; X86-NEXT: testb %bl, %bl
660 ; X86-NEXT: addl $127, %ecx
661 ; X86-NEXT: cmpb %al, %bl
662 ; X86-NEXT: cmovel %esi, %ecx
663 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
664 ; X86-NEXT: movb %dh, %al
665 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
666 ; X86-NEXT: shlb %cl, %al
667 ; X86-NEXT: movzbl %al, %esi
668 ; X86-NEXT: sarb %cl, %al
669 ; X86-NEXT: xorl %ecx, %ecx
670 ; X86-NEXT: testb %dh, %dh
672 ; X86-NEXT: addl $127, %ecx
673 ; X86-NEXT: cmpb %al, %dh
674 ; X86-NEXT: cmovel %esi, %ecx
675 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
676 ; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
677 ; X86-NEXT: movb %ah, %al
678 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
679 ; X86-NEXT: shlb %cl, %al
680 ; X86-NEXT: movzbl %al, %esi
681 ; X86-NEXT: sarb %cl, %al
682 ; X86-NEXT: xorl %edx, %edx
683 ; X86-NEXT: testb %ah, %ah
685 ; X86-NEXT: addl $127, %edx
686 ; X86-NEXT: cmpb %al, %ah
687 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
688 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
689 ; X86-NEXT: cmovel %esi, %edx
690 ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
691 ; X86-NEXT: movl %eax, %edx
692 ; X86-NEXT: shlb %cl, %dl
693 ; X86-NEXT: movzbl %dl, %esi
694 ; X86-NEXT: sarb %cl, %dl
695 ; X86-NEXT: xorl %ecx, %ecx
696 ; X86-NEXT: testb %al, %al
698 ; X86-NEXT: addl $127, %ecx
699 ; X86-NEXT: cmpb %dl, %al
700 ; X86-NEXT: cmovel %esi, %ecx
701 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
702 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
703 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
704 ; X86-NEXT: movl %eax, %edx
705 ; X86-NEXT: shlb %cl, %dl
706 ; X86-NEXT: movzbl %dl, %esi
707 ; X86-NEXT: sarb %cl, %dl
708 ; X86-NEXT: xorl %ecx, %ecx
709 ; X86-NEXT: testb %al, %al
711 ; X86-NEXT: addl $127, %ecx
712 ; X86-NEXT: cmpb %dl, %al
713 ; X86-NEXT: cmovel %esi, %ecx
714 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
715 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
716 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
717 ; X86-NEXT: movl %eax, %edx
718 ; X86-NEXT: shlb %cl, %dl
719 ; X86-NEXT: movzbl %dl, %esi
720 ; X86-NEXT: sarb %cl, %dl
721 ; X86-NEXT: xorl %ecx, %ecx
722 ; X86-NEXT: testb %al, %al
724 ; X86-NEXT: addl $127, %ecx
725 ; X86-NEXT: cmpb %dl, %al
726 ; X86-NEXT: cmovel %esi, %ecx
727 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
728 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
729 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
730 ; X86-NEXT: movl %eax, %edx
731 ; X86-NEXT: shlb %cl, %dl
732 ; X86-NEXT: movzbl %dl, %esi
733 ; X86-NEXT: sarb %cl, %dl
734 ; X86-NEXT: xorl %ecx, %ecx
735 ; X86-NEXT: testb %al, %al
737 ; X86-NEXT: addl $127, %ecx
738 ; X86-NEXT: cmpb %dl, %al
739 ; X86-NEXT: cmovel %esi, %ecx
740 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
741 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
742 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
743 ; X86-NEXT: movl %eax, %edx
744 ; X86-NEXT: shlb %cl, %dl
745 ; X86-NEXT: movzbl %dl, %esi
746 ; X86-NEXT: sarb %cl, %dl
747 ; X86-NEXT: xorl %ecx, %ecx
748 ; X86-NEXT: testb %al, %al
750 ; X86-NEXT: addl $127, %ecx
751 ; X86-NEXT: cmpb %dl, %al
752 ; X86-NEXT: cmovel %esi, %ecx
753 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
754 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
755 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
756 ; X86-NEXT: movl %eax, %edx
757 ; X86-NEXT: shlb %cl, %dl
758 ; X86-NEXT: movzbl %dl, %esi
759 ; X86-NEXT: sarb %cl, %dl
760 ; X86-NEXT: xorl %ecx, %ecx
761 ; X86-NEXT: testb %al, %al
763 ; X86-NEXT: addl $127, %ecx
764 ; X86-NEXT: cmpb %dl, %al
765 ; X86-NEXT: cmovel %esi, %ecx
766 ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
767 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
768 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
769 ; X86-NEXT: movl %eax, %edx
770 ; X86-NEXT: shlb %cl, %dl
771 ; X86-NEXT: movzbl %dl, %esi
772 ; X86-NEXT: sarb %cl, %dl
773 ; X86-NEXT: xorl %ecx, %ecx
774 ; X86-NEXT: testb %al, %al
776 ; X86-NEXT: addl $127, %ecx
777 ; X86-NEXT: cmpb %dl, %al
778 ; X86-NEXT: cmovel %esi, %ecx
779 ; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
780 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
781 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
782 ; X86-NEXT: movl %eax, %edx
783 ; X86-NEXT: shlb %cl, %dl
784 ; X86-NEXT: movzbl %dl, %esi
785 ; X86-NEXT: sarb %cl, %dl
786 ; X86-NEXT: xorl %ecx, %ecx
787 ; X86-NEXT: testb %al, %al
789 ; X86-NEXT: addl $127, %ecx
790 ; X86-NEXT: cmpb %dl, %al
791 ; X86-NEXT: cmovel %esi, %ecx
792 ; X86-NEXT: movl %ecx, %ebp
793 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
794 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
795 ; X86-NEXT: movl %eax, %edx
796 ; X86-NEXT: shlb %cl, %dl
797 ; X86-NEXT: movzbl %dl, %esi
798 ; X86-NEXT: sarb %cl, %dl
799 ; X86-NEXT: xorl %ecx, %ecx
800 ; X86-NEXT: testb %al, %al
802 ; X86-NEXT: addl $127, %ecx
803 ; X86-NEXT: cmpb %dl, %al
804 ; X86-NEXT: cmovel %esi, %ecx
805 ; X86-NEXT: movl %ecx, %edi
806 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
807 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
808 ; X86-NEXT: movl %eax, %edx
809 ; X86-NEXT: shlb %cl, %dl
810 ; X86-NEXT: movzbl %dl, %esi
811 ; X86-NEXT: sarb %cl, %dl
812 ; X86-NEXT: xorl %ebx, %ebx
813 ; X86-NEXT: testb %al, %al
815 ; X86-NEXT: addl $127, %ebx
816 ; X86-NEXT: cmpb %dl, %al
817 ; X86-NEXT: cmovel %esi, %ebx
818 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
819 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
820 ; X86-NEXT: movb %al, %ah
821 ; X86-NEXT: shlb %cl, %ah
822 ; X86-NEXT: movzbl %ah, %esi
823 ; X86-NEXT: sarb %cl, %ah
824 ; X86-NEXT: xorl %edx, %edx
825 ; X86-NEXT: testb %al, %al
827 ; X86-NEXT: addl $127, %edx
828 ; X86-NEXT: cmpb %ah, %al
829 ; X86-NEXT: cmovel %esi, %edx
830 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
831 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
832 ; X86-NEXT: movb %al, %ah
833 ; X86-NEXT: shlb %cl, %ah
834 ; X86-NEXT: movzbl %ah, %esi
835 ; X86-NEXT: sarb %cl, %ah
836 ; X86-NEXT: xorl %ecx, %ecx
837 ; X86-NEXT: testb %al, %al
839 ; X86-NEXT: addl $127, %ecx
840 ; X86-NEXT: cmpb %ah, %al
841 ; X86-NEXT: cmovel %esi, %ecx
842 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
843 ; X86-NEXT: movb %cl, 15(%eax)
844 ; X86-NEXT: movb %dl, 14(%eax)
845 ; X86-NEXT: movb %bl, 13(%eax)
846 ; X86-NEXT: movl %edi, %ecx
847 ; X86-NEXT: movb %cl, 12(%eax)
848 ; X86-NEXT: movl %ebp, %ecx
849 ; X86-NEXT: movb %cl, 11(%eax)
850 ; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
851 ; X86-NEXT: movb %cl, 10(%eax)
852 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
853 ; X86-NEXT: movb %cl, 9(%eax)
854 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
855 ; X86-NEXT: movb %cl, 8(%eax)
856 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
857 ; X86-NEXT: movb %cl, 7(%eax)
858 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
859 ; X86-NEXT: movb %cl, 6(%eax)
860 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
861 ; X86-NEXT: movb %cl, 5(%eax)
862 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
863 ; X86-NEXT: movb %cl, 4(%eax)
864 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
865 ; X86-NEXT: movb %cl, 3(%eax)
866 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
867 ; X86-NEXT: movb %cl, 2(%eax)
868 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
869 ; X86-NEXT: movb %cl, 1(%eax)
870 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
871 ; X86-NEXT: movb %cl, (%eax)
872 ; X86-NEXT: addl $44, %esp
873 ; X86-NEXT: popl %esi
874 ; X86-NEXT: popl %edi
875 ; X86-NEXT: popl %ebx
876 ; X86-NEXT: popl %ebp
878 %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y)