1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3 define <8 x i8> @sqadd8b(ptr %A, ptr %B) nounwind {
6 %tmp1 = load <8 x i8>, ptr %A
7 %tmp2 = load <8 x i8>, ptr %B
8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
12 define <4 x i16> @sqadd4h(ptr %A, ptr %B) nounwind {
13 ;CHECK-LABEL: sqadd4h:
15 %tmp1 = load <4 x i16>, ptr %A
16 %tmp2 = load <4 x i16>, ptr %B
17 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
21 define <2 x i32> @sqadd2s(ptr %A, ptr %B) nounwind {
22 ;CHECK-LABEL: sqadd2s:
24 %tmp1 = load <2 x i32>, ptr %A
25 %tmp2 = load <2 x i32>, ptr %B
26 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
30 define <8 x i8> @uqadd8b(ptr %A, ptr %B) nounwind {
31 ;CHECK-LABEL: uqadd8b:
33 %tmp1 = load <8 x i8>, ptr %A
34 %tmp2 = load <8 x i8>, ptr %B
35 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
39 define <4 x i16> @uqadd4h(ptr %A, ptr %B) nounwind {
40 ;CHECK-LABEL: uqadd4h:
42 %tmp1 = load <4 x i16>, ptr %A
43 %tmp2 = load <4 x i16>, ptr %B
44 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
48 define <2 x i32> @uqadd2s(ptr %A, ptr %B) nounwind {
49 ;CHECK-LABEL: uqadd2s:
51 %tmp1 = load <2 x i32>, ptr %A
52 %tmp2 = load <2 x i32>, ptr %B
53 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
57 define <16 x i8> @sqadd16b(ptr %A, ptr %B) nounwind {
58 ;CHECK-LABEL: sqadd16b:
60 %tmp1 = load <16 x i8>, ptr %A
61 %tmp2 = load <16 x i8>, ptr %B
62 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
66 define <8 x i16> @sqadd8h(ptr %A, ptr %B) nounwind {
67 ;CHECK-LABEL: sqadd8h:
69 %tmp1 = load <8 x i16>, ptr %A
70 %tmp2 = load <8 x i16>, ptr %B
71 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
75 define <4 x i32> @sqadd4s(ptr %A, ptr %B) nounwind {
76 ;CHECK-LABEL: sqadd4s:
78 %tmp1 = load <4 x i32>, ptr %A
79 %tmp2 = load <4 x i32>, ptr %B
80 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
84 define <2 x i64> @sqadd2d(ptr %A, ptr %B) nounwind {
85 ;CHECK-LABEL: sqadd2d:
87 %tmp1 = load <2 x i64>, ptr %A
88 %tmp2 = load <2 x i64>, ptr %B
89 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
93 define <16 x i8> @uqadd16b(ptr %A, ptr %B) nounwind {
94 ;CHECK-LABEL: uqadd16b:
96 %tmp1 = load <16 x i8>, ptr %A
97 %tmp2 = load <16 x i8>, ptr %B
98 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
102 define <8 x i16> @uqadd8h(ptr %A, ptr %B) nounwind {
103 ;CHECK-LABEL: uqadd8h:
105 %tmp1 = load <8 x i16>, ptr %A
106 %tmp2 = load <8 x i16>, ptr %B
107 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
111 define <4 x i32> @uqadd4s(ptr %A, ptr %B) nounwind {
112 ;CHECK-LABEL: uqadd4s:
114 %tmp1 = load <4 x i32>, ptr %A
115 %tmp2 = load <4 x i32>, ptr %B
116 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
120 define <2 x i64> @uqadd2d(ptr %A, ptr %B) nounwind {
121 ;CHECK-LABEL: uqadd2d:
123 %tmp1 = load <2 x i64>, ptr %A
124 %tmp2 = load <2 x i64>, ptr %B
125 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
129 declare <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
130 declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
131 declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
132 declare <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
134 declare <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
135 declare <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
136 declare <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
137 declare <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
139 declare <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
140 declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
141 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
142 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
144 declare <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
145 declare <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
146 declare <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
147 declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
149 define <8 x i8> @usqadd8b(ptr %A, ptr %B) nounwind {
150 ;CHECK-LABEL: usqadd8b:
152 %tmp1 = load <8 x i8>, ptr %A
153 %tmp2 = load <8 x i8>, ptr %B
154 %tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
158 define <4 x i16> @usqadd4h(ptr %A, ptr %B) nounwind {
159 ;CHECK-LABEL: usqadd4h:
161 %tmp1 = load <4 x i16>, ptr %A
162 %tmp2 = load <4 x i16>, ptr %B
163 %tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
167 define <2 x i32> @usqadd2s(ptr %A, ptr %B) nounwind {
168 ;CHECK-LABEL: usqadd2s:
170 %tmp1 = load <2 x i32>, ptr %A
171 %tmp2 = load <2 x i32>, ptr %B
172 %tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
176 define <16 x i8> @usqadd16b(ptr %A, ptr %B) nounwind {
177 ;CHECK-LABEL: usqadd16b:
179 %tmp1 = load <16 x i8>, ptr %A
180 %tmp2 = load <16 x i8>, ptr %B
181 %tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
185 define <8 x i16> @usqadd8h(ptr %A, ptr %B) nounwind {
186 ;CHECK-LABEL: usqadd8h:
188 %tmp1 = load <8 x i16>, ptr %A
189 %tmp2 = load <8 x i16>, ptr %B
190 %tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
194 define <4 x i32> @usqadd4s(ptr %A, ptr %B) nounwind {
195 ;CHECK-LABEL: usqadd4s:
197 %tmp1 = load <4 x i32>, ptr %A
198 %tmp2 = load <4 x i32>, ptr %B
199 %tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
203 define <2 x i64> @usqadd2d(ptr %A, ptr %B) nounwind {
204 ;CHECK-LABEL: usqadd2d:
206 %tmp1 = load <2 x i64>, ptr %A
207 %tmp2 = load <2 x i64>, ptr %B
208 %tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
212 define i64 @usqadd_d(i64 %l, i64 %r) nounwind {
213 ; CHECK-LABEL: usqadd_d:
214 ; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
215 %sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r)
219 define i32 @usqadd_s(i32 %l, i32 %r) nounwind {
220 ; CHECK-LABEL: usqadd_s:
221 ; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
222 %sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r)
226 declare <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
227 declare <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
228 declare <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
229 declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
230 declare i64 @llvm.aarch64.neon.usqadd.i64(i64, i64) nounwind readnone
231 declare i32 @llvm.aarch64.neon.usqadd.i32(i32, i32) nounwind readnone
233 declare <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
234 declare <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
235 declare <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
236 declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
238 define <8 x i8> @suqadd8b(ptr %A, ptr %B) nounwind {
239 ;CHECK-LABEL: suqadd8b:
241 %tmp1 = load <8 x i8>, ptr %A
242 %tmp2 = load <8 x i8>, ptr %B
243 %tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
247 define <4 x i16> @suqadd4h(ptr %A, ptr %B) nounwind {
248 ;CHECK-LABEL: suqadd4h:
250 %tmp1 = load <4 x i16>, ptr %A
251 %tmp2 = load <4 x i16>, ptr %B
252 %tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
256 define <2 x i32> @suqadd2s(ptr %A, ptr %B) nounwind {
257 ;CHECK-LABEL: suqadd2s:
259 %tmp1 = load <2 x i32>, ptr %A
260 %tmp2 = load <2 x i32>, ptr %B
261 %tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
265 define <16 x i8> @suqadd16b(ptr %A, ptr %B) nounwind {
266 ;CHECK-LABEL: suqadd16b:
268 %tmp1 = load <16 x i8>, ptr %A
269 %tmp2 = load <16 x i8>, ptr %B
270 %tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
274 define <8 x i16> @suqadd8h(ptr %A, ptr %B) nounwind {
275 ;CHECK-LABEL: suqadd8h:
277 %tmp1 = load <8 x i16>, ptr %A
278 %tmp2 = load <8 x i16>, ptr %B
279 %tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
283 define <4 x i32> @suqadd4s(ptr %A, ptr %B) nounwind {
284 ;CHECK-LABEL: suqadd4s:
286 %tmp1 = load <4 x i32>, ptr %A
287 %tmp2 = load <4 x i32>, ptr %B
288 %tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
292 define <2 x i64> @suqadd2d(ptr %A, ptr %B) nounwind {
293 ;CHECK-LABEL: suqadd2d:
295 %tmp1 = load <2 x i64>, ptr %A
296 %tmp2 = load <2 x i64>, ptr %B
297 %tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
301 define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind {
302 ; CHECK-LABEL: suqadd_1d:
303 ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
304 %sum = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r)
308 define i64 @suqadd_d(i64 %l, i64 %r) nounwind {
309 ; CHECK-LABEL: suqadd_d:
310 ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
311 %sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r)
315 define i32 @suqadd_s(i32 %l, i32 %r) nounwind {
316 ; CHECK-LABEL: suqadd_s:
317 ; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
318 %sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r)
322 declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
323 declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
324 declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
325 declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
326 declare i64 @llvm.aarch64.neon.suqadd.i64(i64, i64) nounwind readnone
327 declare i32 @llvm.aarch64.neon.suqadd.i32(i32, i32) nounwind readnone
329 declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
330 declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
331 declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
332 declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone