1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
5 define <8 x i32> @vshift00(<8 x i32> %a) {
6 ; CHECK-LABEL: vshift00:
8 ; CHECK-NEXT: vpslld $2, %xmm0, %xmm1
9 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
10 ; CHECK-NEXT: vpslld $2, %xmm0, %xmm0
11 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
13 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
17 define <16 x i16> @vshift01(<16 x i16> %a) {
18 ; CHECK-LABEL: vshift01:
20 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm1
21 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
22 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0
23 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
25 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
29 define <4 x i64> @vshift02(<4 x i64> %a) {
30 ; CHECK-LABEL: vshift02:
32 ; CHECK-NEXT: vpsllq $2, %xmm0, %xmm1
33 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
34 ; CHECK-NEXT: vpsllq $2, %xmm0, %xmm0
35 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
37 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
41 ;;; Logical Shift right
42 define <8 x i32> @vshift03(<8 x i32> %a) {
43 ; CHECK-LABEL: vshift03:
45 ; CHECK-NEXT: vpsrld $2, %xmm0, %xmm1
46 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
47 ; CHECK-NEXT: vpsrld $2, %xmm0, %xmm0
48 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
50 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
54 define <16 x i16> @vshift04(<16 x i16> %a) {
55 ; CHECK-LABEL: vshift04:
57 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm1
58 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
59 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
60 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
62 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
66 define <4 x i64> @vshift05(<4 x i64> %a) {
67 ; CHECK-LABEL: vshift05:
69 ; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm1
70 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
71 ; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm0
72 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
74 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
78 ;;; Arithmetic Shift right
79 define <8 x i32> @vshift06(<8 x i32> %a) {
80 ; CHECK-LABEL: vshift06:
82 ; CHECK-NEXT: vpsrad $2, %xmm0, %xmm1
83 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
84 ; CHECK-NEXT: vpsrad $2, %xmm0, %xmm0
85 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
87 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
91 define <16 x i16> @vshift07(<16 x i16> %a) {
92 ; CHECK-LABEL: vshift07:
94 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm1
95 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
96 ; CHECK-NEXT: vpsraw $2, %xmm0, %xmm0
97 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
99 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
103 define <32 x i8> @vshift09(<32 x i8> %a) {
104 ; CHECK-LABEL: vshift09:
106 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
107 ; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1
108 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
109 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
110 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
111 ; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1
112 ; CHECK-NEXT: vpsubb %xmm3, %xmm1, %xmm1
113 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
114 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
115 ; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
116 ; CHECK-NEXT: vpsubb %xmm3, %xmm0, %xmm0
117 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
119 %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
123 define <32 x i8> @vshift10(<32 x i8> %a) {
124 ; CHECK-LABEL: vshift10:
126 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
127 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
128 ; CHECK-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
129 ; CHECK-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
130 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
132 %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
136 define <32 x i8> @vshift11(<32 x i8> %a) {
137 ; CHECK-LABEL: vshift11:
139 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
140 ; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1
141 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
142 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
143 ; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
144 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
145 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
147 %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
151 define <32 x i8> @vshift12(<32 x i8> %a) {
152 ; CHECK-LABEL: vshift12:
154 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
155 ; CHECK-NEXT: vpsllw $2, %xmm1, %xmm1
156 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
157 ; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
158 ; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0
159 ; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
160 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
162 %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
166 ;;; Support variable shifts
167 define <8 x i32> @vshift08(<8 x i32> %a) {
168 ; CHECK-LABEL: vshift08:
170 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
171 ; CHECK-NEXT: vpslld $23, %xmm1, %xmm1
172 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
173 ; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
174 ; CHECK-NEXT: vpslld $23, %xmm0, %xmm0
175 ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0
176 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
177 ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
179 %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
183 define <8 x i32> @vshift08_add(<8 x i32> %a, <8 x i32> %y) {
184 ; CHECK-LABEL: vshift08_add:
186 ; CHECK-NEXT: vpslld $23, %xmm0, %xmm2
187 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
188 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2
189 ; CHECK-NEXT: vcvttps2dq %xmm2, %xmm2
190 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
191 ; CHECK-NEXT: vpslld $23, %xmm0, %xmm0
192 ; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0
193 ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
194 ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm3
195 ; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0
196 ; CHECK-NEXT: vpaddd %xmm1, %xmm2, %xmm1
197 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
199 %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
200 %r = add <8 x i32> %bitop, %y
205 define <4 x i32> @vshift13(<4 x i32> %in) {
206 ; CHECK-LABEL: vshift13:
208 ; CHECK-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
210 %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
214 ;;; Uses shifts for sign extension
215 define <16 x i16> @sext_v16i16(<16 x i16> %a) {
216 ; CHECK-LABEL: sext_v16i16:
218 ; CHECK-NEXT: vpsllw $8, %xmm0, %xmm1
219 ; CHECK-NEXT: vpsraw $8, %xmm1, %xmm1
220 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
221 ; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0
222 ; CHECK-NEXT: vpsraw $8, %xmm0, %xmm0
223 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
225 %b = trunc <16 x i16> %a to <16 x i8>
226 %c = sext <16 x i8> %b to <16 x i16>
230 define <8 x i32> @sext_v8i32(<8 x i32> %a) {
231 ; CHECK-LABEL: sext_v8i32:
233 ; CHECK-NEXT: vpslld $16, %xmm0, %xmm1
234 ; CHECK-NEXT: vpsrad $16, %xmm1, %xmm1
235 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
236 ; CHECK-NEXT: vpslld $16, %xmm0, %xmm0
237 ; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0
238 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
240 %b = trunc <8 x i32> %a to <8 x i16>
241 %c = sext <8 x i16> %b to <8 x i32>