1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s
4 define <8 x i8> @vabas8(ptr %A, ptr %B, ptr %C) nounwind {
7 ; CHECK-NEXT: vldr d16, [r2]
8 ; CHECK-NEXT: vldr d17, [r1]
9 ; CHECK-NEXT: vldr d0, [r0]
10 ; CHECK-NEXT: vaba.s8 d0, d17, d16
12 %tmp1 = load <8 x i8>, ptr %A
13 %tmp2 = load <8 x i8>, ptr %B
14 %tmp3 = load <8 x i8>, ptr %C
15 %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
16 %tmp5 = add <8 x i8> %tmp1, %tmp4
20 define <4 x i16> @vabas16(ptr %A, ptr %B, ptr %C) nounwind {
21 ; CHECK-LABEL: vabas16:
23 ; CHECK-NEXT: vldr d16, [r2]
24 ; CHECK-NEXT: vldr d17, [r1]
25 ; CHECK-NEXT: vldr d0, [r0]
26 ; CHECK-NEXT: vaba.s16 d0, d17, d16
28 %tmp1 = load <4 x i16>, ptr %A
29 %tmp2 = load <4 x i16>, ptr %B
30 %tmp3 = load <4 x i16>, ptr %C
31 %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
32 %tmp5 = add <4 x i16> %tmp1, %tmp4
36 define <2 x i32> @vabas32(ptr %A, ptr %B, ptr %C) nounwind {
37 ; CHECK-LABEL: vabas32:
39 ; CHECK-NEXT: vldr d16, [r2]
40 ; CHECK-NEXT: vldr d17, [r1]
41 ; CHECK-NEXT: vldr d0, [r0]
42 ; CHECK-NEXT: vaba.s32 d0, d17, d16
44 %tmp1 = load <2 x i32>, ptr %A
45 %tmp2 = load <2 x i32>, ptr %B
46 %tmp3 = load <2 x i32>, ptr %C
47 %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
48 %tmp5 = add <2 x i32> %tmp1, %tmp4
52 define <8 x i8> @vabau8(ptr %A, ptr %B, ptr %C) nounwind {
53 ; CHECK-LABEL: vabau8:
55 ; CHECK-NEXT: vldr d16, [r2]
56 ; CHECK-NEXT: vldr d17, [r1]
57 ; CHECK-NEXT: vldr d0, [r0]
58 ; CHECK-NEXT: vaba.u8 d0, d17, d16
60 %tmp1 = load <8 x i8>, ptr %A
61 %tmp2 = load <8 x i8>, ptr %B
62 %tmp3 = load <8 x i8>, ptr %C
63 %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
64 %tmp5 = add <8 x i8> %tmp1, %tmp4
68 define <4 x i16> @vabau16(ptr %A, ptr %B, ptr %C) nounwind {
69 ; CHECK-LABEL: vabau16:
71 ; CHECK-NEXT: vldr d16, [r2]
72 ; CHECK-NEXT: vldr d17, [r1]
73 ; CHECK-NEXT: vldr d0, [r0]
74 ; CHECK-NEXT: vaba.u16 d0, d17, d16
76 %tmp1 = load <4 x i16>, ptr %A
77 %tmp2 = load <4 x i16>, ptr %B
78 %tmp3 = load <4 x i16>, ptr %C
79 %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
80 %tmp5 = add <4 x i16> %tmp1, %tmp4
84 define <2 x i32> @vabau32(ptr %A, ptr %B, ptr %C) nounwind {
85 ; CHECK-LABEL: vabau32:
87 ; CHECK-NEXT: vldr d16, [r2]
88 ; CHECK-NEXT: vldr d17, [r1]
89 ; CHECK-NEXT: vldr d0, [r0]
90 ; CHECK-NEXT: vaba.u32 d0, d17, d16
92 %tmp1 = load <2 x i32>, ptr %A
93 %tmp2 = load <2 x i32>, ptr %B
94 %tmp3 = load <2 x i32>, ptr %C
95 %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
96 %tmp5 = add <2 x i32> %tmp1, %tmp4
100 define <16 x i8> @vabaQs8(ptr %A, ptr %B, ptr %C) nounwind {
101 ; CHECK-LABEL: vabaQs8:
103 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
104 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
105 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
106 ; CHECK-NEXT: vaba.s8 q0, q9, q8
108 %tmp1 = load <16 x i8>, ptr %A
109 %tmp2 = load <16 x i8>, ptr %B
110 %tmp3 = load <16 x i8>, ptr %C
111 %tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
112 %tmp5 = add <16 x i8> %tmp1, %tmp4
116 define <8 x i16> @vabaQs16(ptr %A, ptr %B, ptr %C) nounwind {
117 ; CHECK-LABEL: vabaQs16:
119 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
120 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
121 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
122 ; CHECK-NEXT: vaba.s16 q0, q9, q8
124 %tmp1 = load <8 x i16>, ptr %A
125 %tmp2 = load <8 x i16>, ptr %B
126 %tmp3 = load <8 x i16>, ptr %C
127 %tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
128 %tmp5 = add <8 x i16> %tmp1, %tmp4
132 define <4 x i32> @vabaQs32(ptr %A, ptr %B, ptr %C) nounwind {
133 ; CHECK-LABEL: vabaQs32:
135 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
136 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
137 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
138 ; CHECK-NEXT: vaba.s32 q0, q9, q8
140 %tmp1 = load <4 x i32>, ptr %A
141 %tmp2 = load <4 x i32>, ptr %B
142 %tmp3 = load <4 x i32>, ptr %C
143 %tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
144 %tmp5 = add <4 x i32> %tmp1, %tmp4
148 define <16 x i8> @vabaQu8(ptr %A, ptr %B, ptr %C) nounwind {
149 ; CHECK-LABEL: vabaQu8:
151 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
152 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
153 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
154 ; CHECK-NEXT: vaba.u8 q0, q9, q8
156 %tmp1 = load <16 x i8>, ptr %A
157 %tmp2 = load <16 x i8>, ptr %B
158 %tmp3 = load <16 x i8>, ptr %C
159 %tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
160 %tmp5 = add <16 x i8> %tmp1, %tmp4
164 define <8 x i16> @vabaQu16(ptr %A, ptr %B, ptr %C) nounwind {
165 ; CHECK-LABEL: vabaQu16:
167 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
168 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
169 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
170 ; CHECK-NEXT: vaba.u16 q0, q9, q8
172 %tmp1 = load <8 x i16>, ptr %A
173 %tmp2 = load <8 x i16>, ptr %B
174 %tmp3 = load <8 x i16>, ptr %C
175 %tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
176 %tmp5 = add <8 x i16> %tmp1, %tmp4
180 define <4 x i32> @vabaQu32(ptr %A, ptr %B, ptr %C) nounwind {
181 ; CHECK-LABEL: vabaQu32:
183 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
184 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
185 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
186 ; CHECK-NEXT: vaba.u32 q0, q9, q8
188 %tmp1 = load <4 x i32>, ptr %A
189 %tmp2 = load <4 x i32>, ptr %B
190 %tmp3 = load <4 x i32>, ptr %C
191 %tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
192 %tmp5 = add <4 x i32> %tmp1, %tmp4
196 declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
197 declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
198 declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
200 declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
201 declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
202 declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
204 declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
205 declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
206 declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
208 declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
209 declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
210 declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
212 define <8 x i16> @vabals8(ptr %A, ptr %B, ptr %C) nounwind {
213 ; CHECK-LABEL: vabals8:
215 ; CHECK-NEXT: vldr d16, [r2]
216 ; CHECK-NEXT: vldr d17, [r1]
217 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
218 ; CHECK-NEXT: vabal.s8 q0, d17, d16
220 %tmp1 = load <8 x i16>, ptr %A
221 %tmp2 = load <8 x i8>, ptr %B
222 %tmp3 = load <8 x i8>, ptr %C
223 %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
224 %tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
225 %tmp6 = add <8 x i16> %tmp1, %tmp5
229 define <4 x i32> @vabals16(ptr %A, ptr %B, ptr %C) nounwind {
230 ; CHECK-LABEL: vabals16:
232 ; CHECK-NEXT: vldr d16, [r2]
233 ; CHECK-NEXT: vldr d17, [r1]
234 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
235 ; CHECK-NEXT: vabal.s16 q0, d17, d16
237 %tmp1 = load <4 x i32>, ptr %A
238 %tmp2 = load <4 x i16>, ptr %B
239 %tmp3 = load <4 x i16>, ptr %C
240 %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
241 %tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
242 %tmp6 = add <4 x i32> %tmp1, %tmp5
246 define <2 x i64> @vabals32(ptr %A, ptr %B, ptr %C) nounwind {
247 ; CHECK-LABEL: vabals32:
249 ; CHECK-NEXT: vldr d16, [r2]
250 ; CHECK-NEXT: vldr d17, [r1]
251 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
252 ; CHECK-NEXT: vabal.s32 q0, d17, d16
254 %tmp1 = load <2 x i64>, ptr %A
255 %tmp2 = load <2 x i32>, ptr %B
256 %tmp3 = load <2 x i32>, ptr %C
257 %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
258 %tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
259 %tmp6 = add <2 x i64> %tmp1, %tmp5
263 define <8 x i16> @vabalu8(ptr %A, ptr %B, ptr %C) nounwind {
264 ; CHECK-LABEL: vabalu8:
266 ; CHECK-NEXT: vldr d16, [r2]
267 ; CHECK-NEXT: vldr d17, [r1]
268 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
269 ; CHECK-NEXT: vabal.u8 q0, d17, d16
271 %tmp1 = load <8 x i16>, ptr %A
272 %tmp2 = load <8 x i8>, ptr %B
273 %tmp3 = load <8 x i8>, ptr %C
274 %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
275 %tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
276 %tmp6 = add <8 x i16> %tmp1, %tmp5
280 define <4 x i32> @vabalu16(ptr %A, ptr %B, ptr %C) nounwind {
281 ; CHECK-LABEL: vabalu16:
283 ; CHECK-NEXT: vldr d16, [r2]
284 ; CHECK-NEXT: vldr d17, [r1]
285 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
286 ; CHECK-NEXT: vabal.u16 q0, d17, d16
288 %tmp1 = load <4 x i32>, ptr %A
289 %tmp2 = load <4 x i16>, ptr %B
290 %tmp3 = load <4 x i16>, ptr %C
291 %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
292 %tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
293 %tmp6 = add <4 x i32> %tmp1, %tmp5
297 define <2 x i64> @vabalu32(ptr %A, ptr %B, ptr %C) nounwind {
298 ; CHECK-LABEL: vabalu32:
300 ; CHECK-NEXT: vldr d16, [r2]
301 ; CHECK-NEXT: vldr d17, [r1]
302 ; CHECK-NEXT: vld1.64 {d0, d1}, [r0]
303 ; CHECK-NEXT: vabal.u32 q0, d17, d16
305 %tmp1 = load <2 x i64>, ptr %A
306 %tmp2 = load <2 x i32>, ptr %B
307 %tmp3 = load <2 x i32>, ptr %C
308 %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
309 %tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
310 %tmp6 = add <2 x i64> %tmp1, %tmp5