1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,MASK
3 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2,+fast-vector-shift-masks | FileCheck %s --check-prefixes=CHECK,SHIFT
4 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver1 | FileCheck %s --check-prefixes=CHECK,SHIFT
6 ; SSE2 Logical Shift Left
8 define <8 x i16> @test_sllw_1(<8 x i16> %InVec) {
9 ; CHECK-LABEL: test_sllw_1:
10 ; CHECK: # %bb.0: # %entry
13 %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
17 define <8 x i16> @test_sllw_2(<8 x i16> %InVec) {
18 ; CHECK-LABEL: test_sllw_2:
19 ; CHECK: # %bb.0: # %entry
20 ; CHECK-NEXT: paddw %xmm0, %xmm0
23 %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
27 define <8 x i16> @test_sllw_3(<8 x i16> %InVec) {
28 ; CHECK-LABEL: test_sllw_3:
29 ; CHECK: # %bb.0: # %entry
30 ; CHECK-NEXT: psllw $15, %xmm0
33 %shl = shl <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
37 define <4 x i32> @test_slld_1(<4 x i32> %InVec) {
38 ; CHECK-LABEL: test_slld_1:
39 ; CHECK: # %bb.0: # %entry
42 %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
46 define <4 x i32> @test_slld_2(<4 x i32> %InVec) {
47 ; CHECK-LABEL: test_slld_2:
48 ; CHECK: # %bb.0: # %entry
49 ; CHECK-NEXT: paddd %xmm0, %xmm0
52 %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
56 define <4 x i32> @test_slld_3(<4 x i32> %InVec) {
57 ; CHECK-LABEL: test_slld_3:
58 ; CHECK: # %bb.0: # %entry
59 ; CHECK-NEXT: pslld $31, %xmm0
62 %shl = shl <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
66 define <2 x i64> @test_sllq_1(<2 x i64> %InVec) {
67 ; CHECK-LABEL: test_sllq_1:
68 ; CHECK: # %bb.0: # %entry
71 %shl = shl <2 x i64> %InVec, <i64 0, i64 0>
75 define <2 x i64> @test_sllq_2(<2 x i64> %InVec) {
76 ; CHECK-LABEL: test_sllq_2:
77 ; CHECK: # %bb.0: # %entry
78 ; CHECK-NEXT: paddq %xmm0, %xmm0
81 %shl = shl <2 x i64> %InVec, <i64 1, i64 1>
85 define <2 x i64> @test_sllq_3(<2 x i64> %InVec) {
86 ; CHECK-LABEL: test_sllq_3:
87 ; CHECK: # %bb.0: # %entry
88 ; CHECK-NEXT: psllq $63, %xmm0
91 %shl = shl <2 x i64> %InVec, <i64 63, i64 63>
95 ; SSE2 Arithmetic Shift
97 define <8 x i16> @test_sraw_1(<8 x i16> %InVec) {
98 ; CHECK-LABEL: test_sraw_1:
99 ; CHECK: # %bb.0: # %entry
102 %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
106 define <8 x i16> @test_sraw_2(<8 x i16> %InVec) {
107 ; CHECK-LABEL: test_sraw_2:
108 ; CHECK: # %bb.0: # %entry
109 ; CHECK-NEXT: psraw $1, %xmm0
112 %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
116 define <8 x i16> @test_sraw_3(<8 x i16> %InVec) {
117 ; CHECK-LABEL: test_sraw_3:
118 ; CHECK: # %bb.0: # %entry
119 ; CHECK-NEXT: psraw $15, %xmm0
122 %shl = ashr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
126 define <4 x i32> @test_srad_1(<4 x i32> %InVec) {
127 ; CHECK-LABEL: test_srad_1:
128 ; CHECK: # %bb.0: # %entry
131 %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
135 define <4 x i32> @test_srad_2(<4 x i32> %InVec) {
136 ; CHECK-LABEL: test_srad_2:
137 ; CHECK: # %bb.0: # %entry
138 ; CHECK-NEXT: psrad $1, %xmm0
141 %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
145 define <4 x i32> @test_srad_3(<4 x i32> %InVec) {
146 ; CHECK-LABEL: test_srad_3:
147 ; CHECK: # %bb.0: # %entry
148 ; CHECK-NEXT: psrad $31, %xmm0
151 %shl = ashr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
155 ; SSE Logical Shift Right
157 define <8 x i16> @test_srlw_1(<8 x i16> %InVec) {
158 ; CHECK-LABEL: test_srlw_1:
159 ; CHECK: # %bb.0: # %entry
162 %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
166 define <8 x i16> @test_srlw_2(<8 x i16> %InVec) {
167 ; CHECK-LABEL: test_srlw_2:
168 ; CHECK: # %bb.0: # %entry
169 ; CHECK-NEXT: psrlw $1, %xmm0
172 %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
176 define <8 x i16> @test_srlw_3(<8 x i16> %InVec) {
177 ; CHECK-LABEL: test_srlw_3:
178 ; CHECK: # %bb.0: # %entry
179 ; CHECK-NEXT: psrlw $15, %xmm0
182 %shl = lshr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
186 define <4 x i32> @test_srld_1(<4 x i32> %InVec) {
187 ; CHECK-LABEL: test_srld_1:
188 ; CHECK: # %bb.0: # %entry
191 %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
195 define <4 x i32> @test_srld_2(<4 x i32> %InVec) {
196 ; CHECK-LABEL: test_srld_2:
197 ; CHECK: # %bb.0: # %entry
198 ; CHECK-NEXT: psrld $1, %xmm0
201 %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
205 define <4 x i32> @test_srld_3(<4 x i32> %InVec) {
206 ; CHECK-LABEL: test_srld_3:
207 ; CHECK: # %bb.0: # %entry
208 ; CHECK-NEXT: psrld $31, %xmm0
211 %shl = lshr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
215 define <2 x i64> @test_srlq_1(<2 x i64> %InVec) {
216 ; CHECK-LABEL: test_srlq_1:
217 ; CHECK: # %bb.0: # %entry
220 %shl = lshr <2 x i64> %InVec, <i64 0, i64 0>
224 define <2 x i64> @test_srlq_2(<2 x i64> %InVec) {
225 ; CHECK-LABEL: test_srlq_2:
226 ; CHECK: # %bb.0: # %entry
227 ; CHECK-NEXT: psrlq $1, %xmm0
230 %shl = lshr <2 x i64> %InVec, <i64 1, i64 1>
234 define <2 x i64> @test_srlq_3(<2 x i64> %InVec) {
235 ; CHECK-LABEL: test_srlq_3:
236 ; CHECK: # %bb.0: # %entry
237 ; CHECK-NEXT: psrlq $63, %xmm0
240 %shl = lshr <2 x i64> %InVec, <i64 63, i64 63>
244 define <4 x i32> @sra_sra_v4i32(<4 x i32> %x) nounwind {
245 ; CHECK-LABEL: sra_sra_v4i32:
247 ; CHECK-NEXT: psrad $6, %xmm0
249 %sra0 = ashr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
250 %sra1 = ashr <4 x i32> %sra0, <i32 4, i32 4, i32 4, i32 4>
254 define <4 x i32> @srl_srl_v4i32(<4 x i32> %x) nounwind {
255 ; CHECK-LABEL: srl_srl_v4i32:
257 ; CHECK-NEXT: psrld $6, %xmm0
259 %srl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
260 %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
264 define <4 x i32> @srl_shl_v4i32(<4 x i32> %x) nounwind {
265 ; CHECK-LABEL: srl_shl_v4i32:
267 ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
269 %srl0 = shl <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
270 %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
274 define <4 x i32> @srl_sra_31_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
275 ; CHECK-LABEL: srl_sra_31_v4i32:
277 ; CHECK-NEXT: psrld $31, %xmm0
279 %sra = ashr <4 x i32> %x, %y
280 %srl1 = lshr <4 x i32> %sra, <i32 31, i32 31, i32 31, i32 31>
284 define <4 x i32> @shl_shl_v4i32(<4 x i32> %x) nounwind {
285 ; CHECK-LABEL: shl_shl_v4i32:
287 ; CHECK-NEXT: pslld $6, %xmm0
289 %shl0 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
290 %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
294 define <4 x i32> @shl_sra_v4i32(<4 x i32> %x) nounwind {
295 ; CHECK-LABEL: shl_sra_v4i32:
297 ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
299 %shl0 = ashr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
300 %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
304 define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind {
305 ; MASK-LABEL: shl_srl_v4i32:
307 ; MASK-NEXT: pslld $3, %xmm0
308 ; MASK-NEXT: pand {{.*}}(%rip), %xmm0
311 ; SHIFT-LABEL: shl_srl_v4i32:
313 ; SHIFT-NEXT: psrld $2, %xmm0
314 ; SHIFT-NEXT: pslld $5, %xmm0
316 %shl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
317 %shl1 = shl <4 x i32> %shl0, <i32 5, i32 5, i32 5, i32 5>
321 define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind {
322 ; CHECK-LABEL: shl_zext_srl_v4i32:
324 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
325 ; CHECK-NEXT: pxor %xmm1, %xmm1
326 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
328 %srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
329 %zext = zext <4 x i16> %srl to <4 x i32>
330 %shl = shl <4 x i32> %zext, <i32 2, i32 2, i32 2, i32 2>
334 define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind {
335 ; CHECK-LABEL: sra_trunc_srl_v4i32:
337 ; CHECK-NEXT: psrad $19, %xmm0
338 ; CHECK-NEXT: packssdw %xmm0, %xmm0
340 %srl = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
341 %trunc = trunc <4 x i32> %srl to <4 x i16>
342 %sra = ashr <4 x i16> %trunc, <i16 3, i16 3, i16 3, i16 3>
346 define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind {
347 ; CHECK-LABEL: shl_zext_shl_v4i32:
349 ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
350 ; CHECK-NEXT: pslld $19, %xmm0
352 %shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
353 %ext = zext <4 x i16> %shl0 to <4 x i32>
354 %shl1 = shl <4 x i32> %ext, <i32 17, i32 17, i32 17, i32 17>
358 define <4 x i32> @sra_v4i32(<4 x i32> %x) nounwind {
359 ; CHECK-LABEL: sra_v4i32:
361 ; CHECK-NEXT: psrad $3, %xmm0
363 %sra = ashr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
367 define <4 x i32> @srl_v4i32(<4 x i32> %x) nounwind {
368 ; CHECK-LABEL: srl_v4i32:
370 ; CHECK-NEXT: psrld $3, %xmm0
372 %sra = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
376 define <4 x i32> @shl_v4i32(<4 x i32> %x) nounwind {
377 ; CHECK-LABEL: shl_v4i32:
379 ; CHECK-NEXT: pslld $3, %xmm0
381 %sra = shl <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>