1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+neon | FileCheck %s
4 define <8 x i8> @v_bsli8(ptr %A, ptr %B, ptr %C) nounwind {
5 ; CHECK-LABEL: v_bsli8:
7 ; CHECK-NEXT: vmov.i8 d16, #0x3
8 ; CHECK-NEXT: vldr d17, [r2]
9 ; CHECK-NEXT: vldr d18, [r0]
10 ; CHECK-NEXT: vbsl d16, d18, d17
11 ; CHECK-NEXT: vmov r0, r1, d16
12 ; CHECK-NEXT: mov pc, lr
13 %tmp1 = load <8 x i8>, ptr %A
14 %tmp2 = load <8 x i8>, ptr %B
15 %tmp3 = load <8 x i8>, ptr %C
16 %tmp4 = and <8 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
17 %tmp6 = and <8 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
18 %tmp7 = or <8 x i8> %tmp4, %tmp6
22 define <4 x i16> @v_bsli16(ptr %A, ptr %B, ptr %C) nounwind {
23 ; CHECK-LABEL: v_bsli16:
25 ; CHECK-NEXT: vmov.i16 d16, #0x3
26 ; CHECK-NEXT: vldr d17, [r2]
27 ; CHECK-NEXT: vldr d18, [r0]
28 ; CHECK-NEXT: vbsl d16, d18, d17
29 ; CHECK-NEXT: vmov r0, r1, d16
30 ; CHECK-NEXT: mov pc, lr
31 %tmp1 = load <4 x i16>, ptr %A
32 %tmp2 = load <4 x i16>, ptr %B
33 %tmp3 = load <4 x i16>, ptr %C
34 %tmp4 = and <4 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3>
35 %tmp6 = and <4 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4>
36 %tmp7 = or <4 x i16> %tmp4, %tmp6
40 define <2 x i32> @v_bsli32(ptr %A, ptr %B, ptr %C) nounwind {
41 ; CHECK-LABEL: v_bsli32:
43 ; CHECK-NEXT: vmov.i32 d16, #0x3
44 ; CHECK-NEXT: vldr d17, [r2]
45 ; CHECK-NEXT: vldr d18, [r0]
46 ; CHECK-NEXT: vbsl d16, d18, d17
47 ; CHECK-NEXT: vmov r0, r1, d16
48 ; CHECK-NEXT: mov pc, lr
49 %tmp1 = load <2 x i32>, ptr %A
50 %tmp2 = load <2 x i32>, ptr %B
51 %tmp3 = load <2 x i32>, ptr %C
52 %tmp4 = and <2 x i32> %tmp1, <i32 3, i32 3>
53 %tmp6 = and <2 x i32> %tmp3, <i32 -4, i32 -4>
54 %tmp7 = or <2 x i32> %tmp4, %tmp6
58 define <1 x i64> @v_bsli64(ptr %A, ptr %B, ptr %C) nounwind {
59 ; CHECK-LABEL: v_bsli64:
61 ; CHECK-NEXT: vldr d17, [r2]
62 ; CHECK-NEXT: vldr d16, LCPI3_0
63 ; CHECK-NEXT: vldr d18, [r0]
64 ; CHECK-NEXT: vbsl d16, d18, d17
65 ; CHECK-NEXT: vmov r0, r1, d16
66 ; CHECK-NEXT: mov pc, lr
67 %tmp1 = load <1 x i64>, ptr %A
68 %tmp2 = load <1 x i64>, ptr %B
69 %tmp3 = load <1 x i64>, ptr %C
70 %tmp4 = and <1 x i64> %tmp1, <i64 3>
71 %tmp6 = and <1 x i64> %tmp3, <i64 -4>
72 %tmp7 = or <1 x i64> %tmp4, %tmp6
76 define <16 x i8> @v_bslQi8(ptr %A, ptr %B, ptr %C) nounwind {
77 ; CHECK-LABEL: v_bslQi8:
79 ; CHECK-NEXT: vld1.32 {d16, d17}, [r2]
80 ; CHECK-NEXT: vmov.i8 q9, #0x3
81 ; CHECK-NEXT: vld1.32 {d20, d21}, [r0]
82 ; CHECK-NEXT: vbit q8, q10, q9
83 ; CHECK-NEXT: vmov r0, r1, d16
84 ; CHECK-NEXT: vmov r2, r3, d17
85 ; CHECK-NEXT: mov pc, lr
86 %tmp1 = load <16 x i8>, ptr %A
87 %tmp2 = load <16 x i8>, ptr %B
88 %tmp3 = load <16 x i8>, ptr %C
89 %tmp4 = and <16 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
90 %tmp6 = and <16 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
91 %tmp7 = or <16 x i8> %tmp4, %tmp6
95 define <8 x i16> @v_bslQi16(ptr %A, ptr %B, ptr %C) nounwind {
96 ; CHECK-LABEL: v_bslQi16:
98 ; CHECK-NEXT: vld1.32 {d16, d17}, [r2]
99 ; CHECK-NEXT: vmov.i16 q9, #0x3
100 ; CHECK-NEXT: vld1.32 {d20, d21}, [r0]
101 ; CHECK-NEXT: vbit q8, q10, q9
102 ; CHECK-NEXT: vmov r0, r1, d16
103 ; CHECK-NEXT: vmov r2, r3, d17
104 ; CHECK-NEXT: mov pc, lr
105 %tmp1 = load <8 x i16>, ptr %A
106 %tmp2 = load <8 x i16>, ptr %B
107 %tmp3 = load <8 x i16>, ptr %C
108 %tmp4 = and <8 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
109 %tmp6 = and <8 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4>
110 %tmp7 = or <8 x i16> %tmp4, %tmp6
114 define <4 x i32> @v_bslQi32(ptr %A, ptr %B, ptr %C) nounwind {
115 ; CHECK-LABEL: v_bslQi32:
117 ; CHECK-NEXT: vld1.32 {d16, d17}, [r2]
118 ; CHECK-NEXT: vmov.i32 q9, #0x3
119 ; CHECK-NEXT: vld1.32 {d20, d21}, [r0]
120 ; CHECK-NEXT: vbit q8, q10, q9
121 ; CHECK-NEXT: vmov r0, r1, d16
122 ; CHECK-NEXT: vmov r2, r3, d17
123 ; CHECK-NEXT: mov pc, lr
124 %tmp1 = load <4 x i32>, ptr %A
125 %tmp2 = load <4 x i32>, ptr %B
126 %tmp3 = load <4 x i32>, ptr %C
127 %tmp4 = and <4 x i32> %tmp1, <i32 3, i32 3, i32 3, i32 3>
128 %tmp6 = and <4 x i32> %tmp3, <i32 -4, i32 -4, i32 -4, i32 -4>
129 %tmp7 = or <4 x i32> %tmp4, %tmp6
133 define <2 x i64> @v_bslQi64(ptr %A, ptr %B, ptr %C) nounwind {
134 ; CHECK-LABEL: v_bslQi64:
136 ; CHECK-NEXT: vld1.32 {d16, d17}, [r2]
137 ; CHECK-NEXT: vld1.32 {d18, d19}, [r0]
138 ; CHECK-NEXT: adr r0, LCPI7_0
139 ; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
140 ; CHECK-NEXT: vbit q8, q9, q10
141 ; CHECK-NEXT: vmov r0, r1, d16
142 ; CHECK-NEXT: vmov r2, r3, d17
143 ; CHECK-NEXT: mov pc, lr
144 %tmp1 = load <2 x i64>, ptr %A
145 %tmp2 = load <2 x i64>, ptr %B
146 %tmp3 = load <2 x i64>, ptr %C
147 %tmp4 = and <2 x i64> %tmp1, <i64 3, i64 3>
148 %tmp6 = and <2 x i64> %tmp3, <i64 -4, i64 -4>
149 %tmp7 = or <2 x i64> %tmp4, %tmp6