1 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
3 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
6 %tmp1 = load <8 x i8>, <8 x i8>* %A
7 %tmp2 = load <8 x i8>, <8 x i8>* %B
8 %tmp3 = shl <8 x i8> %tmp1, %tmp2
12 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
13 ;CHECK-LABEL: vshls16:
15 %tmp1 = load <4 x i16>, <4 x i16>* %A
16 %tmp2 = load <4 x i16>, <4 x i16>* %B
17 %tmp3 = shl <4 x i16> %tmp1, %tmp2
21 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
22 ;CHECK-LABEL: vshls32:
24 %tmp1 = load <2 x i32>, <2 x i32>* %A
25 %tmp2 = load <2 x i32>, <2 x i32>* %B
26 %tmp3 = shl <2 x i32> %tmp1, %tmp2
30 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
31 ;CHECK-LABEL: vshls64:
33 %tmp1 = load <1 x i64>, <1 x i64>* %A
34 %tmp2 = load <1 x i64>, <1 x i64>* %B
35 %tmp3 = shl <1 x i64> %tmp1, %tmp2
39 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
42 %tmp1 = load <8 x i8>, <8 x i8>* %A
43 %tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
47 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
48 ;CHECK-LABEL: vshli16:
50 %tmp1 = load <4 x i16>, <4 x i16>* %A
51 %tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
55 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
56 ;CHECK-LABEL: vshli32:
58 %tmp1 = load <2 x i32>, <2 x i32>* %A
59 %tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
63 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
64 ;CHECK-LABEL: vshli64:
66 %tmp1 = load <1 x i64>, <1 x i64>* %A
67 %tmp2 = shl <1 x i64> %tmp1, < i64 63 >
71 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
72 ;CHECK-LABEL: vshlQs8:
74 %tmp1 = load <16 x i8>, <16 x i8>* %A
75 %tmp2 = load <16 x i8>, <16 x i8>* %B
76 %tmp3 = shl <16 x i8> %tmp1, %tmp2
80 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
81 ;CHECK-LABEL: vshlQs16:
83 %tmp1 = load <8 x i16>, <8 x i16>* %A
84 %tmp2 = load <8 x i16>, <8 x i16>* %B
85 %tmp3 = shl <8 x i16> %tmp1, %tmp2
89 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
90 ;CHECK-LABEL: vshlQs32:
92 %tmp1 = load <4 x i32>, <4 x i32>* %A
93 %tmp2 = load <4 x i32>, <4 x i32>* %B
94 %tmp3 = shl <4 x i32> %tmp1, %tmp2
98 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
99 ;CHECK-LABEL: vshlQs64:
101 %tmp1 = load <2 x i64>, <2 x i64>* %A
102 %tmp2 = load <2 x i64>, <2 x i64>* %B
103 %tmp3 = shl <2 x i64> %tmp1, %tmp2
107 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
108 ;CHECK-LABEL: vshlQi8:
110 %tmp1 = load <16 x i8>, <16 x i8>* %A
111 %tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
115 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
116 ;CHECK-LABEL: vshlQi16:
118 %tmp1 = load <8 x i16>, <8 x i16>* %A
119 %tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
123 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
124 ;CHECK-LABEL: vshlQi32:
126 %tmp1 = load <4 x i32>, <4 x i32>* %A
127 %tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
131 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
132 ;CHECK-LABEL: vshlQi64:
134 %tmp1 = load <2 x i64>, <2 x i64>* %A
135 %tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
139 define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
140 ;CHECK-LABEL: vlshru8:
143 %tmp1 = load <8 x i8>, <8 x i8>* %A
144 %tmp2 = load <8 x i8>, <8 x i8>* %B
145 %tmp3 = lshr <8 x i8> %tmp1, %tmp2
149 define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
150 ;CHECK-LABEL: vlshru16:
153 %tmp1 = load <4 x i16>, <4 x i16>* %A
154 %tmp2 = load <4 x i16>, <4 x i16>* %B
155 %tmp3 = lshr <4 x i16> %tmp1, %tmp2
159 define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
160 ;CHECK-LABEL: vlshru32:
163 %tmp1 = load <2 x i32>, <2 x i32>* %A
164 %tmp2 = load <2 x i32>, <2 x i32>* %B
165 %tmp3 = lshr <2 x i32> %tmp1, %tmp2
169 define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
170 ;CHECK-LABEL: vlshru64:
173 %tmp1 = load <1 x i64>, <1 x i64>* %A
174 %tmp2 = load <1 x i64>, <1 x i64>* %B
175 %tmp3 = lshr <1 x i64> %tmp1, %tmp2
179 define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
180 ;CHECK-LABEL: vlshri8:
182 %tmp1 = load <8 x i8>, <8 x i8>* %A
183 %tmp2 = lshr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
187 define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
188 ;CHECK-LABEL: vlshri16:
190 %tmp1 = load <4 x i16>, <4 x i16>* %A
191 %tmp2 = lshr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
195 define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
196 ;CHECK-LABEL: vlshri32:
198 %tmp1 = load <2 x i32>, <2 x i32>* %A
199 %tmp2 = lshr <2 x i32> %tmp1, < i32 31, i32 31 >
203 define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
204 ;CHECK-LABEL: vlshri64:
206 %tmp1 = load <1 x i64>, <1 x i64>* %A
207 %tmp2 = lshr <1 x i64> %tmp1, < i64 63 >
211 define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
212 ;CHECK-LABEL: vlshrQu8:
215 %tmp1 = load <16 x i8>, <16 x i8>* %A
216 %tmp2 = load <16 x i8>, <16 x i8>* %B
217 %tmp3 = lshr <16 x i8> %tmp1, %tmp2
221 define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
222 ;CHECK-LABEL: vlshrQu16:
225 %tmp1 = load <8 x i16>, <8 x i16>* %A
226 %tmp2 = load <8 x i16>, <8 x i16>* %B
227 %tmp3 = lshr <8 x i16> %tmp1, %tmp2
231 define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
232 ;CHECK-LABEL: vlshrQu32:
235 %tmp1 = load <4 x i32>, <4 x i32>* %A
236 %tmp2 = load <4 x i32>, <4 x i32>* %B
237 %tmp3 = lshr <4 x i32> %tmp1, %tmp2
241 define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
242 ;CHECK-LABEL: vlshrQu64:
245 %tmp1 = load <2 x i64>, <2 x i64>* %A
246 %tmp2 = load <2 x i64>, <2 x i64>* %B
247 %tmp3 = lshr <2 x i64> %tmp1, %tmp2
251 define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
252 ;CHECK-LABEL: vlshrQi8:
254 %tmp1 = load <16 x i8>, <16 x i8>* %A
255 %tmp2 = lshr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
259 define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
260 ;CHECK-LABEL: vlshrQi16:
262 %tmp1 = load <8 x i16>, <8 x i16>* %A
263 %tmp2 = lshr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
267 define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
268 ;CHECK-LABEL: vlshrQi32:
270 %tmp1 = load <4 x i32>, <4 x i32>* %A
271 %tmp2 = lshr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
275 define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
276 ;CHECK-LABEL: vlshrQi64:
278 %tmp1 = load <2 x i64>, <2 x i64>* %A
279 %tmp2 = lshr <2 x i64> %tmp1, < i64 63, i64 63 >
283 ; Example that requires splitting and expanding a vector shift.
284 define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
286 %shr = lshr <2 x i64> %val, < i64 2, i64 2 > ; <<2 x i64>> [#uses=1]
290 define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
291 ;CHECK-LABEL: vashrs8:
294 %tmp1 = load <8 x i8>, <8 x i8>* %A
295 %tmp2 = load <8 x i8>, <8 x i8>* %B
296 %tmp3 = ashr <8 x i8> %tmp1, %tmp2
300 define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
301 ;CHECK-LABEL: vashrs16:
304 %tmp1 = load <4 x i16>, <4 x i16>* %A
305 %tmp2 = load <4 x i16>, <4 x i16>* %B
306 %tmp3 = ashr <4 x i16> %tmp1, %tmp2
310 define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
311 ;CHECK-LABEL: vashrs32:
314 %tmp1 = load <2 x i32>, <2 x i32>* %A
315 %tmp2 = load <2 x i32>, <2 x i32>* %B
316 %tmp3 = ashr <2 x i32> %tmp1, %tmp2
320 define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
321 ;CHECK-LABEL: vashrs64:
324 %tmp1 = load <1 x i64>, <1 x i64>* %A
325 %tmp2 = load <1 x i64>, <1 x i64>* %B
326 %tmp3 = ashr <1 x i64> %tmp1, %tmp2
330 define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
331 ;CHECK-LABEL: vashri8:
333 %tmp1 = load <8 x i8>, <8 x i8>* %A
334 %tmp2 = ashr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
338 define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
339 ;CHECK-LABEL: vashri16:
341 %tmp1 = load <4 x i16>, <4 x i16>* %A
342 %tmp2 = ashr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
346 define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
347 ;CHECK-LABEL: vashri32:
349 %tmp1 = load <2 x i32>, <2 x i32>* %A
350 %tmp2 = ashr <2 x i32> %tmp1, < i32 31, i32 31 >
354 define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
355 ;CHECK-LABEL: vashri64:
357 %tmp1 = load <1 x i64>, <1 x i64>* %A
358 %tmp2 = ashr <1 x i64> %tmp1, < i64 63 >
362 define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
363 ;CHECK-LABEL: vashrQs8:
366 %tmp1 = load <16 x i8>, <16 x i8>* %A
367 %tmp2 = load <16 x i8>, <16 x i8>* %B
368 %tmp3 = ashr <16 x i8> %tmp1, %tmp2
372 define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
373 ;CHECK-LABEL: vashrQs16:
376 %tmp1 = load <8 x i16>, <8 x i16>* %A
377 %tmp2 = load <8 x i16>, <8 x i16>* %B
378 %tmp3 = ashr <8 x i16> %tmp1, %tmp2
382 define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
383 ;CHECK-LABEL: vashrQs32:
386 %tmp1 = load <4 x i32>, <4 x i32>* %A
387 %tmp2 = load <4 x i32>, <4 x i32>* %B
388 %tmp3 = ashr <4 x i32> %tmp1, %tmp2
392 define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
393 ;CHECK-LABEL: vashrQs64:
396 %tmp1 = load <2 x i64>, <2 x i64>* %A
397 %tmp2 = load <2 x i64>, <2 x i64>* %B
398 %tmp3 = ashr <2 x i64> %tmp1, %tmp2
402 define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
403 ;CHECK-LABEL: vashrQi8:
405 %tmp1 = load <16 x i8>, <16 x i8>* %A
406 %tmp2 = ashr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
410 define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
411 ;CHECK-LABEL: vashrQi16:
413 %tmp1 = load <8 x i16>, <8 x i16>* %A
414 %tmp2 = ashr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
418 define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
419 ;CHECK-LABEL: vashrQi32:
421 %tmp1 = load <4 x i32>, <4 x i32>* %A
422 %tmp2 = ashr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
426 define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
427 ;CHECK-LABEL: vashrQi64:
429 %tmp1 = load <2 x i64>, <2 x i64>* %A
430 %tmp2 = ashr <2 x i64> %tmp1, < i64 63, i64 63 >