1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
7 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
9 ; X32: # %bb.0: # %entry
10 ; X32-NEXT: movdqa %xmm0, %xmm1
11 ; X32-NEXT: pslld $2, %xmm1
12 ; X32-NEXT: paddd %xmm0, %xmm0
13 ; X32-NEXT: pxor %xmm1, %xmm0
17 ; X64: # %bb.0: # %entry
18 ; X64-NEXT: movdqa %xmm0, %xmm1
19 ; X64-NEXT: pslld $2, %xmm1
20 ; X64-NEXT: paddd %xmm0, %xmm0
21 ; X64-NEXT: pxor %xmm1, %xmm0
24 %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
25 %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
26 %K = xor <4 x i32> %B, %C
30 define <4 x i32> @shr4(<4 x i32> %A) nounwind {
32 ; X32: # %bb.0: # %entry
33 ; X32-NEXT: movdqa %xmm0, %xmm1
34 ; X32-NEXT: psrld $2, %xmm1
35 ; X32-NEXT: psrld $1, %xmm0
36 ; X32-NEXT: pxor %xmm1, %xmm0
40 ; X64: # %bb.0: # %entry
41 ; X64-NEXT: movdqa %xmm0, %xmm1
42 ; X64-NEXT: psrld $2, %xmm1
43 ; X64-NEXT: psrld $1, %xmm0
44 ; X64-NEXT: pxor %xmm1, %xmm0
47 %B = lshr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
48 %C = lshr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
49 %K = xor <4 x i32> %B, %C
53 define <4 x i32> @sra4(<4 x i32> %A) nounwind {
55 ; X32: # %bb.0: # %entry
56 ; X32-NEXT: movdqa %xmm0, %xmm1
57 ; X32-NEXT: psrad $2, %xmm1
58 ; X32-NEXT: psrad $1, %xmm0
59 ; X32-NEXT: pxor %xmm1, %xmm0
63 ; X64: # %bb.0: # %entry
64 ; X64-NEXT: movdqa %xmm0, %xmm1
65 ; X64-NEXT: psrad $2, %xmm1
66 ; X64-NEXT: psrad $1, %xmm0
67 ; X64-NEXT: pxor %xmm1, %xmm0
70 %B = ashr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
71 %C = ashr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
72 %K = xor <4 x i32> %B, %C
76 define <2 x i64> @shl2(<2 x i64> %A) nounwind {
78 ; X32: # %bb.0: # %entry
79 ; X32-NEXT: movdqa %xmm0, %xmm1
80 ; X32-NEXT: psllq $2, %xmm1
81 ; X32-NEXT: psllq $9, %xmm0
82 ; X32-NEXT: pxor %xmm1, %xmm0
86 ; X64: # %bb.0: # %entry
87 ; X64-NEXT: movdqa %xmm0, %xmm1
88 ; X64-NEXT: psllq $2, %xmm1
89 ; X64-NEXT: psllq $9, %xmm0
90 ; X64-NEXT: pxor %xmm1, %xmm0
93 %B = shl <2 x i64> %A, < i64 2, i64 2>
94 %C = shl <2 x i64> %A, < i64 9, i64 9>
95 %K = xor <2 x i64> %B, %C
99 define <2 x i64> @shr2(<2 x i64> %A) nounwind {
101 ; X32: # %bb.0: # %entry
102 ; X32-NEXT: movdqa %xmm0, %xmm1
103 ; X32-NEXT: psrlq $8, %xmm1
104 ; X32-NEXT: psrlq $1, %xmm0
105 ; X32-NEXT: pxor %xmm1, %xmm0
109 ; X64: # %bb.0: # %entry
110 ; X64-NEXT: movdqa %xmm0, %xmm1
111 ; X64-NEXT: psrlq $8, %xmm1
112 ; X64-NEXT: psrlq $1, %xmm0
113 ; X64-NEXT: pxor %xmm1, %xmm0
116 %B = lshr <2 x i64> %A, < i64 8, i64 8>
117 %C = lshr <2 x i64> %A, < i64 1, i64 1>
118 %K = xor <2 x i64> %B, %C
123 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
125 ; X32: # %bb.0: # %entry
126 ; X32-NEXT: movdqa %xmm0, %xmm1
127 ; X32-NEXT: psllw $2, %xmm1
128 ; X32-NEXT: paddw %xmm0, %xmm0
129 ; X32-NEXT: pxor %xmm1, %xmm0
133 ; X64: # %bb.0: # %entry
134 ; X64-NEXT: movdqa %xmm0, %xmm1
135 ; X64-NEXT: psllw $2, %xmm1
136 ; X64-NEXT: paddw %xmm0, %xmm0
137 ; X64-NEXT: pxor %xmm1, %xmm0
140 %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
141 %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
142 %K = xor <8 x i16> %B, %C
146 define <8 x i16> @shr8(<8 x i16> %A) nounwind {
148 ; X32: # %bb.0: # %entry
149 ; X32-NEXT: movdqa %xmm0, %xmm1
150 ; X32-NEXT: psrlw $2, %xmm1
151 ; X32-NEXT: psrlw $1, %xmm0
152 ; X32-NEXT: pxor %xmm1, %xmm0
156 ; X64: # %bb.0: # %entry
157 ; X64-NEXT: movdqa %xmm0, %xmm1
158 ; X64-NEXT: psrlw $2, %xmm1
159 ; X64-NEXT: psrlw $1, %xmm0
160 ; X64-NEXT: pxor %xmm1, %xmm0
163 %B = lshr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
164 %C = lshr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
165 %K = xor <8 x i16> %B, %C
169 define <8 x i16> @sra8(<8 x i16> %A) nounwind {
171 ; X32: # %bb.0: # %entry
172 ; X32-NEXT: movdqa %xmm0, %xmm1
173 ; X32-NEXT: psraw $2, %xmm1
174 ; X32-NEXT: psraw $1, %xmm0
175 ; X32-NEXT: pxor %xmm1, %xmm0
179 ; X64: # %bb.0: # %entry
180 ; X64-NEXT: movdqa %xmm0, %xmm1
181 ; X64-NEXT: psraw $2, %xmm1
182 ; X64-NEXT: psraw $1, %xmm0
183 ; X64-NEXT: pxor %xmm1, %xmm0
186 %B = ashr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
187 %C = ashr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
188 %K = xor <8 x i16> %B, %C
195 define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
196 ; X32-LABEL: sll8_nosplat:
197 ; X32: # %bb.0: # %entry
198 ; X32-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
199 ; X32-NEXT: pmullw %xmm0, %xmm1
200 ; X32-NEXT: pmullw {{\.LCPI.*}}, %xmm0
201 ; X32-NEXT: pxor %xmm1, %xmm0
204 ; X64-LABEL: sll8_nosplat:
205 ; X64: # %bb.0: # %entry
206 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
207 ; X64-NEXT: pmullw %xmm0, %xmm1
208 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm0
209 ; X64-NEXT: pxor %xmm1, %xmm0
212 %B = shl <8 x i16> %A, < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
213 %C = shl <8 x i16> %A, < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
214 %K = xor <8 x i16> %B, %C
219 define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
220 ; X32-LABEL: shr2_nosplat:
221 ; X32: # %bb.0: # %entry
222 ; X32-NEXT: movdqa %xmm0, %xmm2
223 ; X32-NEXT: psrlq $8, %xmm2
224 ; X32-NEXT: movdqa %xmm0, %xmm1
225 ; X32-NEXT: psrlq $1, %xmm1
226 ; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
227 ; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
228 ; X32-NEXT: xorps %xmm2, %xmm1
229 ; X32-NEXT: movaps %xmm1, %xmm0
232 ; X64-LABEL: shr2_nosplat:
233 ; X64: # %bb.0: # %entry
234 ; X64-NEXT: movdqa %xmm0, %xmm2
235 ; X64-NEXT: psrlq $8, %xmm2
236 ; X64-NEXT: movdqa %xmm0, %xmm1
237 ; X64-NEXT: psrlq $1, %xmm1
238 ; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
239 ; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
240 ; X64-NEXT: xorps %xmm2, %xmm1
241 ; X64-NEXT: movaps %xmm1, %xmm0
244 %B = lshr <2 x i64> %A, < i64 8, i64 1>
245 %C = lshr <2 x i64> %A, < i64 1, i64 0>
246 %K = xor <2 x i64> %B, %C
253 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
254 ; X32-LABEL: shl2_other:
255 ; X32: # %bb.0: # %entry
256 ; X32-NEXT: movdqa %xmm0, %xmm1
257 ; X32-NEXT: psllq $2, %xmm1
258 ; X32-NEXT: psllq $9, %xmm0
259 ; X32-NEXT: pxor %xmm1, %xmm0
262 ; X64-LABEL: shl2_other:
263 ; X64: # %bb.0: # %entry
264 ; X64-NEXT: movdqa %xmm0, %xmm1
265 ; X64-NEXT: psllq $2, %xmm1
266 ; X64-NEXT: psllq $9, %xmm0
267 ; X64-NEXT: pxor %xmm1, %xmm0
270 %B = shl <2 x i32> %A, < i32 2, i32 2>
271 %C = shl <2 x i32> %A, < i32 9, i32 9>
272 %K = xor <2 x i32> %B, %C
276 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
277 ; X32-LABEL: shr2_other:
278 ; X32: # %bb.0: # %entry
279 ; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
280 ; X32-NEXT: movdqa %xmm0, %xmm1
281 ; X32-NEXT: psrlq $8, %xmm1
282 ; X32-NEXT: psrlq $1, %xmm0
283 ; X32-NEXT: pxor %xmm1, %xmm0
286 ; X64-LABEL: shr2_other:
287 ; X64: # %bb.0: # %entry
288 ; X64-NEXT: pand {{.*}}(%rip), %xmm0
289 ; X64-NEXT: movdqa %xmm0, %xmm1
290 ; X64-NEXT: psrlq $8, %xmm1
291 ; X64-NEXT: psrlq $1, %xmm0
292 ; X64-NEXT: pxor %xmm1, %xmm0
295 %B = lshr <2 x i32> %A, < i32 8, i32 8>
296 %C = lshr <2 x i32> %A, < i32 1, i32 1>
297 %K = xor <2 x i32> %B, %C
301 define <16 x i8> @shl9(<16 x i8> %A) nounwind {
304 ; X32-NEXT: psllw $3, %xmm0
305 ; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
310 ; X64-NEXT: psllw $3, %xmm0
311 ; X64-NEXT: pand {{.*}}(%rip), %xmm0
313 %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
317 define <16 x i8> @shr9(<16 x i8> %A) nounwind {
320 ; X32-NEXT: psrlw $3, %xmm0
321 ; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
326 ; X64-NEXT: psrlw $3, %xmm0
327 ; X64-NEXT: pand {{.*}}(%rip), %xmm0
329 %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
333 define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
334 ; X32-LABEL: sra_v16i8_7:
336 ; X32-NEXT: pxor %xmm1, %xmm1
337 ; X32-NEXT: pcmpgtb %xmm0, %xmm1
338 ; X32-NEXT: movdqa %xmm1, %xmm0
341 ; X64-LABEL: sra_v16i8_7:
343 ; X64-NEXT: pxor %xmm1, %xmm1
344 ; X64-NEXT: pcmpgtb %xmm0, %xmm1
345 ; X64-NEXT: movdqa %xmm1, %xmm0
347 %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
351 define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
352 ; X32-LABEL: sra_v16i8:
354 ; X32-NEXT: psrlw $3, %xmm0
355 ; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
356 ; X32-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
357 ; X32-NEXT: pxor %xmm1, %xmm0
358 ; X32-NEXT: psubb %xmm1, %xmm0
361 ; X64-LABEL: sra_v16i8:
363 ; X64-NEXT: psrlw $3, %xmm0
364 ; X64-NEXT: pand {{.*}}(%rip), %xmm0
365 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
366 ; X64-NEXT: pxor %xmm1, %xmm0
367 ; X64-NEXT: psubb %xmm1, %xmm0
369 %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>