1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
3 define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
5 ; CHECK: vmov r0, r1, d16
6 ; CHECK: vmov r2, r3, d17
7 %tmp1 = load <8 x i8>* %A
8 %tmp2 = load <8 x i8>* %B
9 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
13 define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
15 ; CHECK: vmov r0, r1, d16
16 ; CHECK: vmov r2, r3, d17
17 %tmp1 = load <4 x i16>* %A
18 %tmp2 = load <4 x i16>* %B
19 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
23 define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
25 ; CHECK: vmov r0, r1, d16
26 ; CHECK: vmov r2, r3, d17
27 %tmp1 = load <2 x i32>* %A
28 %tmp2 = load <2 x i32>* %B
29 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
33 define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
34 ; CHECK: vcombinefloat
35 ; CHECK: vmov r0, r1, d16
36 ; CHECK: vmov r2, r3, d17
37 %tmp1 = load <2 x float>* %A
38 %tmp2 = load <2 x float>* %B
39 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
43 define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
45 ; CHECK: vmov r0, r1, d16
46 ; CHECK: vmov r2, r3, d17
47 %tmp1 = load <1 x i64>* %A
48 %tmp2 = load <1 x i64>* %B
49 %tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
53 ; Check for vget_low and vget_high implemented with shufflevector. PR8411.
54 ; They should not require storing to the stack.
56 define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
59 ; CHECK: vmov r0, r1, d16
60 %tmp1 = load <8 x i16>* %A
61 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
65 define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
68 ; CHECK: vmov r0, r1, d17
69 %tmp1 = load <16 x i8>* %A
70 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>