1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64
5 define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
6 ; CHECK-LABEL: variable_shl0:
8 ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
9 ; CHECK-NEXT: ret{{[l|q]}}
10 %k = shl <4 x i32> %x, %y
14 define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
15 ; CHECK-LABEL: variable_shl1:
17 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
18 ; CHECK-NEXT: ret{{[l|q]}}
19 %k = shl <8 x i32> %x, %y
23 define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
24 ; CHECK-LABEL: variable_shl2:
26 ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
27 ; CHECK-NEXT: ret{{[l|q]}}
28 %k = shl <2 x i64> %x, %y
32 define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
33 ; CHECK-LABEL: variable_shl3:
35 ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
36 ; CHECK-NEXT: ret{{[l|q]}}
37 %k = shl <4 x i64> %x, %y
41 define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
42 ; CHECK-LABEL: variable_srl0:
44 ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
45 ; CHECK-NEXT: ret{{[l|q]}}
46 %k = lshr <4 x i32> %x, %y
50 define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
51 ; CHECK-LABEL: variable_srl1:
53 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
54 ; CHECK-NEXT: ret{{[l|q]}}
55 %k = lshr <8 x i32> %x, %y
59 define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
60 ; CHECK-LABEL: variable_srl2:
62 ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
63 ; CHECK-NEXT: ret{{[l|q]}}
64 %k = lshr <2 x i64> %x, %y
68 define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
69 ; CHECK-LABEL: variable_srl3:
71 ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
72 ; CHECK-NEXT: ret{{[l|q]}}
73 %k = lshr <4 x i64> %x, %y
77 define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
78 ; CHECK-LABEL: variable_sra0:
80 ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0
81 ; CHECK-NEXT: ret{{[l|q]}}
82 %k = ashr <4 x i32> %x, %y
86 define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
87 ; CHECK-LABEL: variable_sra1:
89 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
90 ; CHECK-NEXT: ret{{[l|q]}}
91 %k = ashr <8 x i32> %x, %y
97 define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
98 ; CHECK-LABEL: vshift00:
100 ; CHECK-NEXT: vpslld $2, %ymm0, %ymm0
101 ; CHECK-NEXT: ret{{[l|q]}}
102 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
106 define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
107 ; CHECK-LABEL: vshift01:
109 ; CHECK-NEXT: vpsllw $2, %ymm0, %ymm0
110 ; CHECK-NEXT: ret{{[l|q]}}
111 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
115 define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
116 ; CHECK-LABEL: vshift02:
118 ; CHECK-NEXT: vpsllq $2, %ymm0, %ymm0
119 ; CHECK-NEXT: ret{{[l|q]}}
120 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
124 ;;; Logical Shift right
126 define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
127 ; CHECK-LABEL: vshift03:
129 ; CHECK-NEXT: vpsrld $2, %ymm0, %ymm0
130 ; CHECK-NEXT: ret{{[l|q]}}
131 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
135 define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
136 ; CHECK-LABEL: vshift04:
138 ; CHECK-NEXT: vpsrlw $2, %ymm0, %ymm0
139 ; CHECK-NEXT: ret{{[l|q]}}
140 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
144 define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
145 ; CHECK-LABEL: vshift05:
147 ; CHECK-NEXT: vpsrlq $2, %ymm0, %ymm0
148 ; CHECK-NEXT: ret{{[l|q]}}
149 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
153 ;;; Arithmetic Shift right
155 define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
156 ; CHECK-LABEL: vshift06:
158 ; CHECK-NEXT: vpsrad $2, %ymm0, %ymm0
159 ; CHECK-NEXT: ret{{[l|q]}}
160 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
164 define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
165 ; CHECK-LABEL: vshift07:
167 ; CHECK-NEXT: vpsraw $2, %ymm0, %ymm0
168 ; CHECK-NEXT: ret{{[l|q]}}
169 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
173 define <4 x i32> @variable_sra0_load(<4 x i32> %x, ptr %y) {
174 ; X86-LABEL: variable_sra0_load:
176 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
177 ; X86-NEXT: vpsravd (%eax), %xmm0, %xmm0
180 ; X64-LABEL: variable_sra0_load:
182 ; X64-NEXT: vpsravd (%rdi), %xmm0, %xmm0
184 %y1 = load <4 x i32>, ptr %y
185 %k = ashr <4 x i32> %x, %y1
189 define <8 x i32> @variable_sra1_load(<8 x i32> %x, ptr %y) {
190 ; X86-LABEL: variable_sra1_load:
192 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
193 ; X86-NEXT: vpsravd (%eax), %ymm0, %ymm0
196 ; X64-LABEL: variable_sra1_load:
198 ; X64-NEXT: vpsravd (%rdi), %ymm0, %ymm0
200 %y1 = load <8 x i32>, ptr %y
201 %k = ashr <8 x i32> %x, %y1
205 define <4 x i32> @variable_shl0_load(<4 x i32> %x, ptr %y) {
206 ; X86-LABEL: variable_shl0_load:
208 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
209 ; X86-NEXT: vpsllvd (%eax), %xmm0, %xmm0
212 ; X64-LABEL: variable_shl0_load:
214 ; X64-NEXT: vpsllvd (%rdi), %xmm0, %xmm0
216 %y1 = load <4 x i32>, ptr %y
217 %k = shl <4 x i32> %x, %y1
221 define <8 x i32> @variable_shl1_load(<8 x i32> %x, ptr %y) {
222 ; X86-LABEL: variable_shl1_load:
224 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
225 ; X86-NEXT: vpsllvd (%eax), %ymm0, %ymm0
228 ; X64-LABEL: variable_shl1_load:
230 ; X64-NEXT: vpsllvd (%rdi), %ymm0, %ymm0
232 %y1 = load <8 x i32>, ptr %y
233 %k = shl <8 x i32> %x, %y1
237 define <2 x i64> @variable_shl2_load(<2 x i64> %x, ptr %y) {
238 ; X86-LABEL: variable_shl2_load:
240 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
241 ; X86-NEXT: vpsllvq (%eax), %xmm0, %xmm0
244 ; X64-LABEL: variable_shl2_load:
246 ; X64-NEXT: vpsllvq (%rdi), %xmm0, %xmm0
248 %y1 = load <2 x i64>, ptr %y
249 %k = shl <2 x i64> %x, %y1
253 define <4 x i64> @variable_shl3_load(<4 x i64> %x, ptr %y) {
254 ; X86-LABEL: variable_shl3_load:
256 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
257 ; X86-NEXT: vpsllvq (%eax), %ymm0, %ymm0
260 ; X64-LABEL: variable_shl3_load:
262 ; X64-NEXT: vpsllvq (%rdi), %ymm0, %ymm0
264 %y1 = load <4 x i64>, ptr %y
265 %k = shl <4 x i64> %x, %y1
269 define <4 x i32> @variable_srl0_load(<4 x i32> %x, ptr %y) {
270 ; X86-LABEL: variable_srl0_load:
272 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
273 ; X86-NEXT: vpsrlvd (%eax), %xmm0, %xmm0
276 ; X64-LABEL: variable_srl0_load:
278 ; X64-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0
280 %y1 = load <4 x i32>, ptr %y
281 %k = lshr <4 x i32> %x, %y1
285 define <8 x i32> @variable_srl1_load(<8 x i32> %x, ptr %y) {
286 ; X86-LABEL: variable_srl1_load:
288 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
289 ; X86-NEXT: vpsrlvd (%eax), %ymm0, %ymm0
292 ; X64-LABEL: variable_srl1_load:
294 ; X64-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0
296 %y1 = load <8 x i32>, ptr %y
297 %k = lshr <8 x i32> %x, %y1
301 define <2 x i64> @variable_srl2_load(<2 x i64> %x, ptr %y) {
302 ; X86-LABEL: variable_srl2_load:
304 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
305 ; X86-NEXT: vpsrlvq (%eax), %xmm0, %xmm0
308 ; X64-LABEL: variable_srl2_load:
310 ; X64-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0
312 %y1 = load <2 x i64>, ptr %y
313 %k = lshr <2 x i64> %x, %y1
317 define <4 x i64> @variable_srl3_load(<4 x i64> %x, ptr %y) {
318 ; X86-LABEL: variable_srl3_load:
320 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
321 ; X86-NEXT: vpsrlvq (%eax), %ymm0, %ymm0
324 ; X64-LABEL: variable_srl3_load:
326 ; X64-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0
328 %y1 = load <4 x i64>, ptr %y
329 %k = lshr <4 x i64> %x, %y1
333 define <32 x i8> @shl9(<32 x i8> %A) nounwind {
336 ; X86-NEXT: vpsllw $3, %ymm0, %ymm0
337 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
342 ; X64-NEXT: vpsllw $3, %ymm0, %ymm0
343 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
345 %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
349 define <32 x i8> @shr9(<32 x i8> %A) nounwind {
352 ; X86-NEXT: vpsrlw $3, %ymm0, %ymm0
353 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
358 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
359 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
361 %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
365 define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
366 ; CHECK-LABEL: sra_v32i8_7:
368 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
369 ; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
370 ; CHECK-NEXT: ret{{[l|q]}}
371 %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
375 define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
376 ; X86-LABEL: sra_v32i8:
378 ; X86-NEXT: vpsrlw $3, %ymm0, %ymm0
379 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
380 ; X86-NEXT: vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
381 ; X86-NEXT: vpxor %ymm1, %ymm0, %ymm0
382 ; X86-NEXT: vpsubb %ymm1, %ymm0, %ymm0
385 ; X64-LABEL: sra_v32i8:
387 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
388 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
389 ; X64-NEXT: vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
390 ; X64-NEXT: vpxor %ymm1, %ymm0, %ymm0
391 ; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0
393 %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
397 define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
398 ; CHECK-LABEL: sext_v16i16:
400 ; CHECK-NEXT: vpsllw $8, %ymm0, %ymm0
401 ; CHECK-NEXT: vpsraw $8, %ymm0, %ymm0
402 ; CHECK-NEXT: ret{{[l|q]}}
403 %b = trunc <16 x i16> %a to <16 x i8>
404 %c = sext <16 x i8> %b to <16 x i16>
408 define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
409 ; CHECK-LABEL: sext_v8i32:
411 ; CHECK-NEXT: vpslld $16, %ymm0, %ymm0
412 ; CHECK-NEXT: vpsrad $16, %ymm0, %ymm0
413 ; CHECK-NEXT: ret{{[l|q]}}
414 %b = trunc <8 x i32> %a to <8 x i16>
415 %c = sext <8 x i16> %b to <8 x i32>
419 define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
420 ; CHECK-LABEL: variable_shl16:
422 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
423 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
424 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
425 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
426 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
427 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
428 ; CHECK-NEXT: vzeroupper
429 ; CHECK-NEXT: ret{{[l|q]}}
430 %res = shl <8 x i16> %lhs, %rhs
434 define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
435 ; CHECK-LABEL: variable_ashr16:
437 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
438 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
439 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
440 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
441 ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
442 ; CHECK-NEXT: vzeroupper
443 ; CHECK-NEXT: ret{{[l|q]}}
444 %res = ashr <8 x i16> %lhs, %rhs
448 define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) {
449 ; CHECK-LABEL: variable_lshr16:
451 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
452 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
453 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
454 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
455 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
456 ; CHECK-NEXT: vzeroupper
457 ; CHECK-NEXT: ret{{[l|q]}}
458 %res = lshr <8 x i16> %lhs, %rhs