1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
4 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
7 target triple = "aarch64-unknown-linux-gnu"
9 define void @add_v4i8(ptr %a, ptr %b) {
10 ; CHECK-LABEL: add_v4i8:
12 ; CHECK-NEXT: ptrue p0.h, vl4
13 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
14 ; CHECK-NEXT: ld1b { z1.h }, p0/z, [x1]
15 ; CHECK-NEXT: add z0.h, z0.h, z1.h
16 ; CHECK-NEXT: st1b { z0.h }, p0, [x0]
19 ; NONEON-NOSVE-LABEL: add_v4i8:
20 ; NONEON-NOSVE: // %bb.0:
21 ; NONEON-NOSVE-NEXT: ldrb w8, [x0, #3]
22 ; NONEON-NOSVE-NEXT: ldrb w9, [x1, #3]
23 ; NONEON-NOSVE-NEXT: ldrb w10, [x0, #2]
24 ; NONEON-NOSVE-NEXT: ldrb w11, [x0, #1]
25 ; NONEON-NOSVE-NEXT: ldrb w12, [x1, #2]
26 ; NONEON-NOSVE-NEXT: ldrb w13, [x0]
27 ; NONEON-NOSVE-NEXT: add w8, w8, w9
28 ; NONEON-NOSVE-NEXT: ldrb w14, [x1, #1]
29 ; NONEON-NOSVE-NEXT: ldrb w9, [x1]
30 ; NONEON-NOSVE-NEXT: add w10, w10, w12
31 ; NONEON-NOSVE-NEXT: strb w8, [x0, #3]
32 ; NONEON-NOSVE-NEXT: add w8, w11, w14
33 ; NONEON-NOSVE-NEXT: add w9, w13, w9
34 ; NONEON-NOSVE-NEXT: strb w10, [x0, #2]
35 ; NONEON-NOSVE-NEXT: strb w8, [x0, #1]
36 ; NONEON-NOSVE-NEXT: strb w9, [x0]
37 ; NONEON-NOSVE-NEXT: ret
38 %op1 = load <4 x i8>, ptr %a
39 %op2 = load <4 x i8>, ptr %b
40 %res = add <4 x i8> %op1, %op2
41 store <4 x i8> %res, ptr %a
45 define void @add_v8i8(ptr %a, ptr %b) {
46 ; CHECK-LABEL: add_v8i8:
48 ; CHECK-NEXT: ldr d0, [x0]
49 ; CHECK-NEXT: ldr d1, [x1]
50 ; CHECK-NEXT: add z0.b, z0.b, z1.b
51 ; CHECK-NEXT: str d0, [x0]
54 ; NONEON-NOSVE-LABEL: add_v8i8:
55 ; NONEON-NOSVE: // %bb.0:
56 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
57 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
58 ; NONEON-NOSVE-NEXT: ldr d0, [x1]
59 ; NONEON-NOSVE-NEXT: ldr d1, [x0]
60 ; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #8]
61 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
62 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
63 ; NONEON-NOSVE-NEXT: add w8, w9, w8
64 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
65 ; NONEON-NOSVE-NEXT: strb w8, [sp, #31]
66 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
67 ; NONEON-NOSVE-NEXT: add w8, w9, w8
68 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
69 ; NONEON-NOSVE-NEXT: strb w8, [sp, #30]
70 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
71 ; NONEON-NOSVE-NEXT: add w8, w9, w8
72 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
73 ; NONEON-NOSVE-NEXT: strb w8, [sp, #29]
74 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
75 ; NONEON-NOSVE-NEXT: add w8, w9, w8
76 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
77 ; NONEON-NOSVE-NEXT: strb w8, [sp, #28]
78 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
79 ; NONEON-NOSVE-NEXT: add w8, w9, w8
80 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
81 ; NONEON-NOSVE-NEXT: strb w8, [sp, #27]
82 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
83 ; NONEON-NOSVE-NEXT: add w8, w9, w8
84 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
85 ; NONEON-NOSVE-NEXT: strb w8, [sp, #26]
86 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
87 ; NONEON-NOSVE-NEXT: add w8, w9, w8
88 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
89 ; NONEON-NOSVE-NEXT: strb w8, [sp, #25]
90 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
91 ; NONEON-NOSVE-NEXT: add w8, w9, w8
92 ; NONEON-NOSVE-NEXT: strb w8, [sp, #24]
93 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
94 ; NONEON-NOSVE-NEXT: str d0, [x0]
95 ; NONEON-NOSVE-NEXT: add sp, sp, #32
96 ; NONEON-NOSVE-NEXT: ret
97 %op1 = load <8 x i8>, ptr %a
98 %op2 = load <8 x i8>, ptr %b
99 %res = add <8 x i8> %op1, %op2
100 store <8 x i8> %res, ptr %a
104 define void @add_v16i8(ptr %a, ptr %b) {
105 ; CHECK-LABEL: add_v16i8:
107 ; CHECK-NEXT: ldr q0, [x0]
108 ; CHECK-NEXT: ldr q1, [x1]
109 ; CHECK-NEXT: add z0.b, z0.b, z1.b
110 ; CHECK-NEXT: str q0, [x0]
113 ; NONEON-NOSVE-LABEL: add_v16i8:
114 ; NONEON-NOSVE: // %bb.0:
115 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
116 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
117 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
118 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
119 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
120 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
121 ; NONEON-NOSVE-NEXT: add w8, w9, w8
122 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
123 ; NONEON-NOSVE-NEXT: strb w8, [sp, #47]
124 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
125 ; NONEON-NOSVE-NEXT: add w8, w9, w8
126 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
127 ; NONEON-NOSVE-NEXT: strb w8, [sp, #46]
128 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
129 ; NONEON-NOSVE-NEXT: add w8, w9, w8
130 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
131 ; NONEON-NOSVE-NEXT: strb w8, [sp, #45]
132 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
133 ; NONEON-NOSVE-NEXT: add w8, w9, w8
134 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
135 ; NONEON-NOSVE-NEXT: strb w8, [sp, #44]
136 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
137 ; NONEON-NOSVE-NEXT: add w8, w9, w8
138 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
139 ; NONEON-NOSVE-NEXT: strb w8, [sp, #43]
140 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
141 ; NONEON-NOSVE-NEXT: add w8, w9, w8
142 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
143 ; NONEON-NOSVE-NEXT: strb w8, [sp, #42]
144 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
145 ; NONEON-NOSVE-NEXT: add w8, w9, w8
146 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
147 ; NONEON-NOSVE-NEXT: strb w8, [sp, #41]
148 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
149 ; NONEON-NOSVE-NEXT: add w8, w9, w8
150 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
151 ; NONEON-NOSVE-NEXT: strb w8, [sp, #40]
152 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
153 ; NONEON-NOSVE-NEXT: add w8, w9, w8
154 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
155 ; NONEON-NOSVE-NEXT: strb w8, [sp, #39]
156 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
157 ; NONEON-NOSVE-NEXT: add w8, w9, w8
158 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5]
159 ; NONEON-NOSVE-NEXT: strb w8, [sp, #38]
160 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
161 ; NONEON-NOSVE-NEXT: add w8, w9, w8
162 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
163 ; NONEON-NOSVE-NEXT: strb w8, [sp, #37]
164 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
165 ; NONEON-NOSVE-NEXT: add w8, w9, w8
166 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3]
167 ; NONEON-NOSVE-NEXT: strb w8, [sp, #36]
168 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
169 ; NONEON-NOSVE-NEXT: add w8, w9, w8
170 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2]
171 ; NONEON-NOSVE-NEXT: strb w8, [sp, #35]
172 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
173 ; NONEON-NOSVE-NEXT: add w8, w9, w8
174 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1]
175 ; NONEON-NOSVE-NEXT: strb w8, [sp, #34]
176 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
177 ; NONEON-NOSVE-NEXT: add w8, w9, w8
178 ; NONEON-NOSVE-NEXT: ldrb w9, [sp]
179 ; NONEON-NOSVE-NEXT: strb w8, [sp, #33]
180 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
181 ; NONEON-NOSVE-NEXT: add w8, w9, w8
182 ; NONEON-NOSVE-NEXT: strb w8, [sp, #32]
183 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
184 ; NONEON-NOSVE-NEXT: str q0, [x0]
185 ; NONEON-NOSVE-NEXT: add sp, sp, #48
186 ; NONEON-NOSVE-NEXT: ret
187 %op1 = load <16 x i8>, ptr %a
188 %op2 = load <16 x i8>, ptr %b
189 %res = add <16 x i8> %op1, %op2
190 store <16 x i8> %res, ptr %a
194 define void @add_v32i8(ptr %a, ptr %b) {
195 ; CHECK-LABEL: add_v32i8:
197 ; CHECK-NEXT: ldp q0, q3, [x1]
198 ; CHECK-NEXT: ldp q1, q2, [x0]
199 ; CHECK-NEXT: add z0.b, z1.b, z0.b
200 ; CHECK-NEXT: add z1.b, z2.b, z3.b
201 ; CHECK-NEXT: stp q0, q1, [x0]
204 ; NONEON-NOSVE-LABEL: add_v32i8:
205 ; NONEON-NOSVE: // %bb.0:
206 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
207 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
208 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
209 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
210 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
211 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
212 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #63]
213 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #47]
214 ; NONEON-NOSVE-NEXT: add w8, w9, w8
215 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #46]
216 ; NONEON-NOSVE-NEXT: strb w8, [sp, #95]
217 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #62]
218 ; NONEON-NOSVE-NEXT: add w8, w9, w8
219 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #45]
220 ; NONEON-NOSVE-NEXT: strb w8, [sp, #94]
221 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61]
222 ; NONEON-NOSVE-NEXT: add w8, w9, w8
223 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #44]
224 ; NONEON-NOSVE-NEXT: strb w8, [sp, #93]
225 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #60]
226 ; NONEON-NOSVE-NEXT: add w8, w9, w8
227 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #43]
228 ; NONEON-NOSVE-NEXT: strb w8, [sp, #92]
229 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #59]
230 ; NONEON-NOSVE-NEXT: add w8, w9, w8
231 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #42]
232 ; NONEON-NOSVE-NEXT: strb w8, [sp, #91]
233 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #58]
234 ; NONEON-NOSVE-NEXT: add w8, w9, w8
235 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #41]
236 ; NONEON-NOSVE-NEXT: strb w8, [sp, #90]
237 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57]
238 ; NONEON-NOSVE-NEXT: add w8, w9, w8
239 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #40]
240 ; NONEON-NOSVE-NEXT: strb w8, [sp, #89]
241 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #56]
242 ; NONEON-NOSVE-NEXT: add w8, w9, w8
243 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #39]
244 ; NONEON-NOSVE-NEXT: strb w8, [sp, #88]
245 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #55]
246 ; NONEON-NOSVE-NEXT: add w8, w9, w8
247 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #38]
248 ; NONEON-NOSVE-NEXT: strb w8, [sp, #87]
249 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #54]
250 ; NONEON-NOSVE-NEXT: add w8, w9, w8
251 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #37]
252 ; NONEON-NOSVE-NEXT: strb w8, [sp, #86]
253 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53]
254 ; NONEON-NOSVE-NEXT: add w8, w9, w8
255 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #36]
256 ; NONEON-NOSVE-NEXT: strb w8, [sp, #85]
257 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #52]
258 ; NONEON-NOSVE-NEXT: add w8, w9, w8
259 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #35]
260 ; NONEON-NOSVE-NEXT: strb w8, [sp, #84]
261 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #51]
262 ; NONEON-NOSVE-NEXT: add w8, w9, w8
263 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #34]
264 ; NONEON-NOSVE-NEXT: strb w8, [sp, #83]
265 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #50]
266 ; NONEON-NOSVE-NEXT: add w8, w9, w8
267 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #33]
268 ; NONEON-NOSVE-NEXT: strb w8, [sp, #82]
269 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49]
270 ; NONEON-NOSVE-NEXT: add w8, w9, w8
271 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #32]
272 ; NONEON-NOSVE-NEXT: strb w8, [sp, #81]
273 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48]
274 ; NONEON-NOSVE-NEXT: add w8, w9, w8
275 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
276 ; NONEON-NOSVE-NEXT: strb w8, [sp, #80]
277 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #31]
278 ; NONEON-NOSVE-NEXT: add w8, w9, w8
279 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #14]
280 ; NONEON-NOSVE-NEXT: strb w8, [sp, #79]
281 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #30]
282 ; NONEON-NOSVE-NEXT: add w8, w9, w8
283 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13]
284 ; NONEON-NOSVE-NEXT: strb w8, [sp, #78]
285 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
286 ; NONEON-NOSVE-NEXT: add w8, w9, w8
287 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12]
288 ; NONEON-NOSVE-NEXT: strb w8, [sp, #77]
289 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #28]
290 ; NONEON-NOSVE-NEXT: add w8, w9, w8
291 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11]
292 ; NONEON-NOSVE-NEXT: strb w8, [sp, #76]
293 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #27]
294 ; NONEON-NOSVE-NEXT: add w8, w9, w8
295 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #10]
296 ; NONEON-NOSVE-NEXT: strb w8, [sp, #75]
297 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #26]
298 ; NONEON-NOSVE-NEXT: add w8, w9, w8
299 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9]
300 ; NONEON-NOSVE-NEXT: strb w8, [sp, #74]
301 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
302 ; NONEON-NOSVE-NEXT: add w8, w9, w8
303 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8]
304 ; NONEON-NOSVE-NEXT: strb w8, [sp, #73]
305 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #24]
306 ; NONEON-NOSVE-NEXT: add w8, w9, w8
307 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
308 ; NONEON-NOSVE-NEXT: strb w8, [sp, #72]
309 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #23]
310 ; NONEON-NOSVE-NEXT: add w8, w9, w8
311 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
312 ; NONEON-NOSVE-NEXT: strb w8, [sp, #71]
313 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #22]
314 ; NONEON-NOSVE-NEXT: add w8, w9, w8
315 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5]
316 ; NONEON-NOSVE-NEXT: strb w8, [sp, #70]
317 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
318 ; NONEON-NOSVE-NEXT: add w8, w9, w8
319 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #4]
320 ; NONEON-NOSVE-NEXT: strb w8, [sp, #69]
321 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #20]
322 ; NONEON-NOSVE-NEXT: add w8, w9, w8
323 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3]
324 ; NONEON-NOSVE-NEXT: strb w8, [sp, #68]
325 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #19]
326 ; NONEON-NOSVE-NEXT: add w8, w9, w8
327 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #2]
328 ; NONEON-NOSVE-NEXT: strb w8, [sp, #67]
329 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #18]
330 ; NONEON-NOSVE-NEXT: add w8, w9, w8
331 ; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1]
332 ; NONEON-NOSVE-NEXT: strb w8, [sp, #66]
333 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
334 ; NONEON-NOSVE-NEXT: add w8, w9, w8
335 ; NONEON-NOSVE-NEXT: ldrb w9, [sp]
336 ; NONEON-NOSVE-NEXT: strb w8, [sp, #65]
337 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #16]
338 ; NONEON-NOSVE-NEXT: add w8, w9, w8
339 ; NONEON-NOSVE-NEXT: strb w8, [sp, #64]
340 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
341 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
342 ; NONEON-NOSVE-NEXT: add sp, sp, #96
343 ; NONEON-NOSVE-NEXT: ret
344 %op1 = load <32 x i8>, ptr %a
345 %op2 = load <32 x i8>, ptr %b
346 %res = add <32 x i8> %op1, %op2
347 store <32 x i8> %res, ptr %a
351 define void @add_v2i16(ptr %a, ptr %b, ptr %c) {
352 ; CHECK-LABEL: add_v2i16:
354 ; CHECK-NEXT: ptrue p0.s, vl2
355 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
356 ; CHECK-NEXT: ld1h { z1.s }, p0/z, [x1]
357 ; CHECK-NEXT: add z0.s, z0.s, z1.s
358 ; CHECK-NEXT: st1h { z0.s }, p0, [x0]
361 ; NONEON-NOSVE-LABEL: add_v2i16:
362 ; NONEON-NOSVE: // %bb.0:
363 ; NONEON-NOSVE-NEXT: ldrh w8, [x0]
364 ; NONEON-NOSVE-NEXT: ldrh w9, [x1]
365 ; NONEON-NOSVE-NEXT: ldrh w10, [x0, #2]
366 ; NONEON-NOSVE-NEXT: ldrh w11, [x1, #2]
367 ; NONEON-NOSVE-NEXT: add w8, w8, w9
368 ; NONEON-NOSVE-NEXT: add w9, w10, w11
369 ; NONEON-NOSVE-NEXT: strh w8, [x0]
370 ; NONEON-NOSVE-NEXT: strh w9, [x0, #2]
371 ; NONEON-NOSVE-NEXT: ret
372 %op1 = load <2 x i16>, ptr %a
373 %op2 = load <2 x i16>, ptr %b
374 %res = add <2 x i16> %op1, %op2
375 store <2 x i16> %res, ptr %a
379 define void @add_v4i16(ptr %a, ptr %b, ptr %c) {
380 ; CHECK-LABEL: add_v4i16:
382 ; CHECK-NEXT: ldr d0, [x0]
383 ; CHECK-NEXT: ldr d1, [x1]
384 ; CHECK-NEXT: add z0.h, z0.h, z1.h
385 ; CHECK-NEXT: str d0, [x0]
388 ; NONEON-NOSVE-LABEL: add_v4i16:
389 ; NONEON-NOSVE: // %bb.0:
390 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
391 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
392 ; NONEON-NOSVE-NEXT: ldr d0, [x1]
393 ; NONEON-NOSVE-NEXT: ldr d1, [x0]
394 ; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #8]
395 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
396 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
397 ; NONEON-NOSVE-NEXT: add w8, w9, w8
398 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12]
399 ; NONEON-NOSVE-NEXT: strh w8, [sp, #30]
400 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
401 ; NONEON-NOSVE-NEXT: add w8, w9, w8
402 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
403 ; NONEON-NOSVE-NEXT: strh w8, [sp, #28]
404 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
405 ; NONEON-NOSVE-NEXT: add w8, w9, w8
406 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8]
407 ; NONEON-NOSVE-NEXT: strh w8, [sp, #26]
408 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
409 ; NONEON-NOSVE-NEXT: add w8, w9, w8
410 ; NONEON-NOSVE-NEXT: strh w8, [sp, #24]
411 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
412 ; NONEON-NOSVE-NEXT: str d0, [x0]
413 ; NONEON-NOSVE-NEXT: add sp, sp, #32
414 ; NONEON-NOSVE-NEXT: ret
415 %op1 = load <4 x i16>, ptr %a
416 %op2 = load <4 x i16>, ptr %b
417 %res = add <4 x i16> %op1, %op2
418 store <4 x i16> %res, ptr %a
422 define void @add_v8i16(ptr %a, ptr %b, ptr %c) {
423 ; CHECK-LABEL: add_v8i16:
425 ; CHECK-NEXT: ldr q0, [x0]
426 ; CHECK-NEXT: ldr q1, [x1]
427 ; CHECK-NEXT: add z0.h, z0.h, z1.h
428 ; CHECK-NEXT: str q0, [x0]
431 ; NONEON-NOSVE-LABEL: add_v8i16:
432 ; NONEON-NOSVE: // %bb.0:
433 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
434 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
435 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
436 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
437 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
438 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
439 ; NONEON-NOSVE-NEXT: add w8, w9, w8
440 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12]
441 ; NONEON-NOSVE-NEXT: strh w8, [sp, #46]
442 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
443 ; NONEON-NOSVE-NEXT: add w8, w9, w8
444 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
445 ; NONEON-NOSVE-NEXT: strh w8, [sp, #44]
446 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
447 ; NONEON-NOSVE-NEXT: add w8, w9, w8
448 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8]
449 ; NONEON-NOSVE-NEXT: strh w8, [sp, #42]
450 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
451 ; NONEON-NOSVE-NEXT: add w8, w9, w8
452 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6]
453 ; NONEON-NOSVE-NEXT: strh w8, [sp, #40]
454 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
455 ; NONEON-NOSVE-NEXT: add w8, w9, w8
456 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
457 ; NONEON-NOSVE-NEXT: strh w8, [sp, #38]
458 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
459 ; NONEON-NOSVE-NEXT: add w8, w9, w8
460 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2]
461 ; NONEON-NOSVE-NEXT: strh w8, [sp, #36]
462 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
463 ; NONEON-NOSVE-NEXT: add w8, w9, w8
464 ; NONEON-NOSVE-NEXT: ldrh w9, [sp]
465 ; NONEON-NOSVE-NEXT: strh w8, [sp, #34]
466 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
467 ; NONEON-NOSVE-NEXT: add w8, w9, w8
468 ; NONEON-NOSVE-NEXT: strh w8, [sp, #32]
469 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
470 ; NONEON-NOSVE-NEXT: str q0, [x0]
471 ; NONEON-NOSVE-NEXT: add sp, sp, #48
472 ; NONEON-NOSVE-NEXT: ret
473 %op1 = load <8 x i16>, ptr %a
474 %op2 = load <8 x i16>, ptr %b
475 %res = add <8 x i16> %op1, %op2
476 store <8 x i16> %res, ptr %a
480 define void @add_v16i16(ptr %a, ptr %b, ptr %c) {
481 ; CHECK-LABEL: add_v16i16:
483 ; CHECK-NEXT: ldp q0, q3, [x1]
484 ; CHECK-NEXT: ldp q1, q2, [x0]
485 ; CHECK-NEXT: add z0.h, z1.h, z0.h
486 ; CHECK-NEXT: add z1.h, z2.h, z3.h
487 ; CHECK-NEXT: stp q0, q1, [x0]
490 ; NONEON-NOSVE-LABEL: add_v16i16:
491 ; NONEON-NOSVE: // %bb.0:
492 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
493 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
494 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
495 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
496 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
497 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
498 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #62]
499 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #46]
500 ; NONEON-NOSVE-NEXT: add w8, w9, w8
501 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #44]
502 ; NONEON-NOSVE-NEXT: strh w8, [sp, #94]
503 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #60]
504 ; NONEON-NOSVE-NEXT: add w8, w9, w8
505 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42]
506 ; NONEON-NOSVE-NEXT: strh w8, [sp, #92]
507 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #58]
508 ; NONEON-NOSVE-NEXT: add w8, w9, w8
509 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #40]
510 ; NONEON-NOSVE-NEXT: strh w8, [sp, #90]
511 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56]
512 ; NONEON-NOSVE-NEXT: add w8, w9, w8
513 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #38]
514 ; NONEON-NOSVE-NEXT: strh w8, [sp, #88]
515 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #54]
516 ; NONEON-NOSVE-NEXT: add w8, w9, w8
517 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #36]
518 ; NONEON-NOSVE-NEXT: strh w8, [sp, #86]
519 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #52]
520 ; NONEON-NOSVE-NEXT: add w8, w9, w8
521 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #34]
522 ; NONEON-NOSVE-NEXT: strh w8, [sp, #84]
523 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #50]
524 ; NONEON-NOSVE-NEXT: add w8, w9, w8
525 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #32]
526 ; NONEON-NOSVE-NEXT: strh w8, [sp, #82]
527 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #48]
528 ; NONEON-NOSVE-NEXT: add w8, w9, w8
529 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
530 ; NONEON-NOSVE-NEXT: strh w8, [sp, #80]
531 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #30]
532 ; NONEON-NOSVE-NEXT: add w8, w9, w8
533 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12]
534 ; NONEON-NOSVE-NEXT: strh w8, [sp, #78]
535 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #28]
536 ; NONEON-NOSVE-NEXT: add w8, w9, w8
537 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10]
538 ; NONEON-NOSVE-NEXT: strh w8, [sp, #76]
539 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #26]
540 ; NONEON-NOSVE-NEXT: add w8, w9, w8
541 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8]
542 ; NONEON-NOSVE-NEXT: strh w8, [sp, #74]
543 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #24]
544 ; NONEON-NOSVE-NEXT: add w8, w9, w8
545 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6]
546 ; NONEON-NOSVE-NEXT: strh w8, [sp, #72]
547 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22]
548 ; NONEON-NOSVE-NEXT: add w8, w9, w8
549 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
550 ; NONEON-NOSVE-NEXT: strh w8, [sp, #70]
551 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20]
552 ; NONEON-NOSVE-NEXT: add w8, w9, w8
553 ; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2]
554 ; NONEON-NOSVE-NEXT: strh w8, [sp, #68]
555 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18]
556 ; NONEON-NOSVE-NEXT: add w8, w9, w8
557 ; NONEON-NOSVE-NEXT: ldrh w9, [sp]
558 ; NONEON-NOSVE-NEXT: strh w8, [sp, #66]
559 ; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16]
560 ; NONEON-NOSVE-NEXT: add w8, w9, w8
561 ; NONEON-NOSVE-NEXT: strh w8, [sp, #64]
562 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
563 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
564 ; NONEON-NOSVE-NEXT: add sp, sp, #96
565 ; NONEON-NOSVE-NEXT: ret
566 %op1 = load <16 x i16>, ptr %a
567 %op2 = load <16 x i16>, ptr %b
568 %res = add <16 x i16> %op1, %op2
569 store <16 x i16> %res, ptr %a
573 define void @abs_v2i32(ptr %a) {
574 ; CHECK-LABEL: abs_v2i32:
576 ; CHECK-NEXT: ptrue p0.s, vl2
577 ; CHECK-NEXT: ldr d0, [x0]
578 ; CHECK-NEXT: abs z0.s, p0/m, z0.s
579 ; CHECK-NEXT: str d0, [x0]
582 ; NONEON-NOSVE-LABEL: abs_v2i32:
583 ; NONEON-NOSVE: // %bb.0:
584 ; NONEON-NOSVE-NEXT: ldr d0, [x0]
585 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
586 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
587 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
588 ; NONEON-NOSVE-NEXT: cmp w8, #0
589 ; NONEON-NOSVE-NEXT: cneg w9, w8, mi
590 ; NONEON-NOSVE-NEXT: ldr w8, [sp]
591 ; NONEON-NOSVE-NEXT: cmp w8, #0
592 ; NONEON-NOSVE-NEXT: cneg w8, w8, mi
593 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8]
594 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
595 ; NONEON-NOSVE-NEXT: str d0, [x0]
596 ; NONEON-NOSVE-NEXT: add sp, sp, #16
597 ; NONEON-NOSVE-NEXT: ret
598 %op1 = load <2 x i32>, ptr %a
599 %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false)
600 store <2 x i32> %res, ptr %a
604 define void @abs_v4i32(ptr %a) {
605 ; CHECK-LABEL: abs_v4i32:
607 ; CHECK-NEXT: ptrue p0.s, vl4
608 ; CHECK-NEXT: ldr q0, [x0]
609 ; CHECK-NEXT: abs z0.s, p0/m, z0.s
610 ; CHECK-NEXT: str q0, [x0]
613 ; NONEON-NOSVE-LABEL: abs_v4i32:
614 ; NONEON-NOSVE: // %bb.0:
615 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
616 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
617 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
618 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
619 ; NONEON-NOSVE-NEXT: cmp w8, #0
620 ; NONEON-NOSVE-NEXT: cneg w9, w8, mi
621 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
622 ; NONEON-NOSVE-NEXT: cmp w8, #0
623 ; NONEON-NOSVE-NEXT: cneg w8, w8, mi
624 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
625 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
626 ; NONEON-NOSVE-NEXT: cmp w8, #0
627 ; NONEON-NOSVE-NEXT: cneg w9, w8, mi
628 ; NONEON-NOSVE-NEXT: ldr w8, [sp]
629 ; NONEON-NOSVE-NEXT: cmp w8, #0
630 ; NONEON-NOSVE-NEXT: cneg w8, w8, mi
631 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
632 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
633 ; NONEON-NOSVE-NEXT: str q0, [x0]
634 ; NONEON-NOSVE-NEXT: add sp, sp, #32
635 ; NONEON-NOSVE-NEXT: ret
636 %op1 = load <4 x i32>, ptr %a
637 %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false)
638 store <4 x i32> %res, ptr %a
642 define void @abs_v8i32(ptr %a) {
643 ; CHECK-LABEL: abs_v8i32:
645 ; CHECK-NEXT: ldp q0, q1, [x0]
646 ; CHECK-NEXT: ptrue p0.s, vl4
647 ; CHECK-NEXT: abs z0.s, p0/m, z0.s
648 ; CHECK-NEXT: abs z1.s, p0/m, z1.s
649 ; CHECK-NEXT: stp q0, q1, [x0]
652 ; NONEON-NOSVE-LABEL: abs_v8i32:
653 ; NONEON-NOSVE: // %bb.0:
654 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
655 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
656 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
657 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
658 ; NONEON-NOSVE-NEXT: cmp w8, #0
659 ; NONEON-NOSVE-NEXT: cneg w9, w8, mi
660 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
661 ; NONEON-NOSVE-NEXT: cmp w8, #0
662 ; NONEON-NOSVE-NEXT: cneg w8, w8, mi
663 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56]
664 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
665 ; NONEON-NOSVE-NEXT: cmp w8, #0
666 ; NONEON-NOSVE-NEXT: cneg w9, w8, mi
667 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
668 ; NONEON-NOSVE-NEXT: cmp w8, #0
669 ; NONEON-NOSVE-NEXT: cneg w8, w8, mi
670 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48]
671 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
672 ; NONEON-NOSVE-NEXT: cmp w8, #0
673 ; NONEON-NOSVE-NEXT: cneg w9, w8, mi
674 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
675 ; NONEON-NOSVE-NEXT: cmp w8, #0
676 ; NONEON-NOSVE-NEXT: cneg w8, w8, mi
677 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40]
678 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
679 ; NONEON-NOSVE-NEXT: cmp w8, #0
680 ; NONEON-NOSVE-NEXT: cneg w9, w8, mi
681 ; NONEON-NOSVE-NEXT: ldr w8, [sp]
682 ; NONEON-NOSVE-NEXT: cmp w8, #0
683 ; NONEON-NOSVE-NEXT: cneg w8, w8, mi
684 ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32]
685 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
686 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
687 ; NONEON-NOSVE-NEXT: add sp, sp, #64
688 ; NONEON-NOSVE-NEXT: ret
689 %op1 = load <8 x i32>, ptr %a
690 %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false)
691 store <8 x i32> %res, ptr %a
695 define void @abs_v2i64(ptr %a) {
696 ; CHECK-LABEL: abs_v2i64:
698 ; CHECK-NEXT: ptrue p0.d, vl2
699 ; CHECK-NEXT: ldr q0, [x0]
700 ; CHECK-NEXT: abs z0.d, p0/m, z0.d
701 ; CHECK-NEXT: str q0, [x0]
704 ; NONEON-NOSVE-LABEL: abs_v2i64:
705 ; NONEON-NOSVE: // %bb.0:
706 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
707 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
708 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
709 ; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
710 ; NONEON-NOSVE-NEXT: cmp x8, #0
711 ; NONEON-NOSVE-NEXT: cneg x9, x8, mi
712 ; NONEON-NOSVE-NEXT: ldr x8, [sp]
713 ; NONEON-NOSVE-NEXT: cmp x8, #0
714 ; NONEON-NOSVE-NEXT: cneg x8, x8, mi
715 ; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #16]
716 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
717 ; NONEON-NOSVE-NEXT: str q0, [x0]
718 ; NONEON-NOSVE-NEXT: add sp, sp, #32
719 ; NONEON-NOSVE-NEXT: ret
720 %op1 = load <2 x i64>, ptr %a
721 %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false)
722 store <2 x i64> %res, ptr %a
726 define void @abs_v4i64(ptr %a) {
727 ; CHECK-LABEL: abs_v4i64:
729 ; CHECK-NEXT: ldp q0, q1, [x0]
730 ; CHECK-NEXT: ptrue p0.d, vl2
731 ; CHECK-NEXT: abs z0.d, p0/m, z0.d
732 ; CHECK-NEXT: abs z1.d, p0/m, z1.d
733 ; CHECK-NEXT: stp q0, q1, [x0]
736 ; NONEON-NOSVE-LABEL: abs_v4i64:
737 ; NONEON-NOSVE: // %bb.0:
738 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
739 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
740 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
741 ; NONEON-NOSVE-NEXT: ldr x8, [sp, #24]
742 ; NONEON-NOSVE-NEXT: cmp x8, #0
743 ; NONEON-NOSVE-NEXT: cneg x9, x8, mi
744 ; NONEON-NOSVE-NEXT: ldr x8, [sp, #16]
745 ; NONEON-NOSVE-NEXT: cmp x8, #0
746 ; NONEON-NOSVE-NEXT: cneg x8, x8, mi
747 ; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #48]
748 ; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
749 ; NONEON-NOSVE-NEXT: cmp x8, #0
750 ; NONEON-NOSVE-NEXT: cneg x9, x8, mi
751 ; NONEON-NOSVE-NEXT: ldr x8, [sp]
752 ; NONEON-NOSVE-NEXT: cmp x8, #0
753 ; NONEON-NOSVE-NEXT: cneg x8, x8, mi
754 ; NONEON-NOSVE-NEXT: stp x8, x9, [sp, #32]
755 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
756 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
757 ; NONEON-NOSVE-NEXT: add sp, sp, #64
758 ; NONEON-NOSVE-NEXT: ret
759 %op1 = load <4 x i64>, ptr %a
760 %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false)
761 store <4 x i64> %res, ptr %a
765 define void @fadd_v2f16(ptr %a, ptr %b) {
766 ; CHECK-LABEL: fadd_v2f16:
768 ; CHECK-NEXT: ptrue p0.h, vl4
769 ; CHECK-NEXT: ldr s0, [x0]
770 ; CHECK-NEXT: ldr s1, [x1]
771 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
772 ; CHECK-NEXT: fmov w8, s0
773 ; CHECK-NEXT: str w8, [x0]
776 ; NONEON-NOSVE-LABEL: fadd_v2f16:
777 ; NONEON-NOSVE: // %bb.0:
778 ; NONEON-NOSVE-NEXT: ldr w8, [x0]
779 ; NONEON-NOSVE-NEXT: str w8, [sp, #-48]!
780 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
781 ; NONEON-NOSVE-NEXT: ldr w8, [x1]
782 ; NONEON-NOSVE-NEXT: str w8, [sp, #8]
783 ; NONEON-NOSVE-NEXT: ldp d0, d1, [sp]
784 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
785 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
786 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #18]
787 ; NONEON-NOSVE-NEXT: fcvt s0, h0
788 ; NONEON-NOSVE-NEXT: fcvt s1, h1
789 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
790 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #16]
791 ; NONEON-NOSVE-NEXT: fcvt s1, h1
792 ; NONEON-NOSVE-NEXT: fcvt h0, s0
793 ; NONEON-NOSVE-NEXT: str h0, [sp, #34]
794 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
795 ; NONEON-NOSVE-NEXT: fcvt s0, h0
796 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
797 ; NONEON-NOSVE-NEXT: fcvt h0, s0
798 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
799 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
800 ; NONEON-NOSVE-NEXT: str d0, [sp, #40]
801 ; NONEON-NOSVE-NEXT: ldr w8, [sp, #40]
802 ; NONEON-NOSVE-NEXT: str w8, [x0]
803 ; NONEON-NOSVE-NEXT: add sp, sp, #48
804 ; NONEON-NOSVE-NEXT: ret
805 %op1 = load <2 x half>, ptr %a
806 %op2 = load <2 x half>, ptr %b
807 %res = fadd <2 x half> %op1, %op2
808 store <2 x half> %res, ptr %a
812 define void @fadd_v4f16(ptr %a, ptr %b) {
813 ; CHECK-LABEL: fadd_v4f16:
815 ; CHECK-NEXT: ptrue p0.h, vl4
816 ; CHECK-NEXT: ldr d0, [x0]
817 ; CHECK-NEXT: ldr d1, [x1]
818 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
819 ; CHECK-NEXT: str d0, [x0]
822 ; NONEON-NOSVE-LABEL: fadd_v4f16:
823 ; NONEON-NOSVE: // %bb.0:
824 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
825 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
826 ; NONEON-NOSVE-NEXT: ldr d0, [x1]
827 ; NONEON-NOSVE-NEXT: ldr d1, [x0]
828 ; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #8]
829 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
830 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
831 ; NONEON-NOSVE-NEXT: fcvt s0, h0
832 ; NONEON-NOSVE-NEXT: fcvt s1, h1
833 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
834 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
835 ; NONEON-NOSVE-NEXT: fcvt s1, h1
836 ; NONEON-NOSVE-NEXT: fcvt h0, s0
837 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
838 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
839 ; NONEON-NOSVE-NEXT: fcvt s0, h0
840 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
841 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
842 ; NONEON-NOSVE-NEXT: fcvt s1, h1
843 ; NONEON-NOSVE-NEXT: fcvt h0, s0
844 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
845 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
846 ; NONEON-NOSVE-NEXT: fcvt s0, h0
847 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
848 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
849 ; NONEON-NOSVE-NEXT: fcvt s1, h1
850 ; NONEON-NOSVE-NEXT: fcvt h0, s0
851 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
852 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
853 ; NONEON-NOSVE-NEXT: fcvt s0, h0
854 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
855 ; NONEON-NOSVE-NEXT: fcvt h0, s0
856 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
857 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
858 ; NONEON-NOSVE-NEXT: str d0, [x0]
859 ; NONEON-NOSVE-NEXT: add sp, sp, #32
860 ; NONEON-NOSVE-NEXT: ret
861 %op1 = load <4 x half>, ptr %a
862 %op2 = load <4 x half>, ptr %b
863 %res = fadd <4 x half> %op1, %op2
864 store <4 x half> %res, ptr %a
868 define void @fadd_v8f16(ptr %a, ptr %b) {
869 ; CHECK-LABEL: fadd_v8f16:
871 ; CHECK-NEXT: ptrue p0.h, vl8
872 ; CHECK-NEXT: ldr q0, [x0]
873 ; CHECK-NEXT: ldr q1, [x1]
874 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
875 ; CHECK-NEXT: str q0, [x0]
878 ; NONEON-NOSVE-LABEL: fadd_v8f16:
879 ; NONEON-NOSVE: // %bb.0:
880 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
881 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
882 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
883 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
884 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
885 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
886 ; NONEON-NOSVE-NEXT: fcvt s0, h0
887 ; NONEON-NOSVE-NEXT: fcvt s1, h1
888 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
889 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
890 ; NONEON-NOSVE-NEXT: fcvt s1, h1
891 ; NONEON-NOSVE-NEXT: fcvt h0, s0
892 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
893 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
894 ; NONEON-NOSVE-NEXT: fcvt s0, h0
895 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
896 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
897 ; NONEON-NOSVE-NEXT: fcvt s1, h1
898 ; NONEON-NOSVE-NEXT: fcvt h0, s0
899 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
900 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
901 ; NONEON-NOSVE-NEXT: fcvt s0, h0
902 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
903 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
904 ; NONEON-NOSVE-NEXT: fcvt s1, h1
905 ; NONEON-NOSVE-NEXT: fcvt h0, s0
906 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
907 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
908 ; NONEON-NOSVE-NEXT: fcvt s0, h0
909 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
910 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
911 ; NONEON-NOSVE-NEXT: fcvt s1, h1
912 ; NONEON-NOSVE-NEXT: fcvt h0, s0
913 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
914 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
915 ; NONEON-NOSVE-NEXT: fcvt s0, h0
916 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
917 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
918 ; NONEON-NOSVE-NEXT: fcvt s1, h1
919 ; NONEON-NOSVE-NEXT: fcvt h0, s0
920 ; NONEON-NOSVE-NEXT: str h0, [sp, #38]
921 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
922 ; NONEON-NOSVE-NEXT: fcvt s0, h0
923 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
924 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
925 ; NONEON-NOSVE-NEXT: fcvt s1, h1
926 ; NONEON-NOSVE-NEXT: fcvt h0, s0
927 ; NONEON-NOSVE-NEXT: str h0, [sp, #36]
928 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
929 ; NONEON-NOSVE-NEXT: fcvt s0, h0
930 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
931 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
932 ; NONEON-NOSVE-NEXT: fcvt s1, h1
933 ; NONEON-NOSVE-NEXT: fcvt h0, s0
934 ; NONEON-NOSVE-NEXT: str h0, [sp, #34]
935 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
936 ; NONEON-NOSVE-NEXT: fcvt s0, h0
937 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
938 ; NONEON-NOSVE-NEXT: fcvt h0, s0
939 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
940 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
941 ; NONEON-NOSVE-NEXT: str q0, [x0]
942 ; NONEON-NOSVE-NEXT: add sp, sp, #48
943 ; NONEON-NOSVE-NEXT: ret
944 %op1 = load <8 x half>, ptr %a
945 %op2 = load <8 x half>, ptr %b
946 %res = fadd <8 x half> %op1, %op2
947 store <8 x half> %res, ptr %a
951 define void @fadd_v16f16(ptr %a, ptr %b) {
952 ; CHECK-LABEL: fadd_v16f16:
954 ; CHECK-NEXT: ldp q0, q3, [x1]
955 ; CHECK-NEXT: ptrue p0.h, vl8
956 ; CHECK-NEXT: ldp q1, q2, [x0]
957 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
958 ; CHECK-NEXT: movprfx z1, z2
959 ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h
960 ; CHECK-NEXT: stp q0, q1, [x0]
963 ; NONEON-NOSVE-LABEL: fadd_v16f16:
964 ; NONEON-NOSVE: // %bb.0:
965 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
966 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
967 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
968 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
969 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
970 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
971 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
972 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #46]
973 ; NONEON-NOSVE-NEXT: fcvt s0, h0
974 ; NONEON-NOSVE-NEXT: fcvt s1, h1
975 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
976 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #44]
977 ; NONEON-NOSVE-NEXT: fcvt s1, h1
978 ; NONEON-NOSVE-NEXT: fcvt h0, s0
979 ; NONEON-NOSVE-NEXT: str h0, [sp, #94]
980 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
981 ; NONEON-NOSVE-NEXT: fcvt s0, h0
982 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
983 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #42]
984 ; NONEON-NOSVE-NEXT: fcvt s1, h1
985 ; NONEON-NOSVE-NEXT: fcvt h0, s0
986 ; NONEON-NOSVE-NEXT: str h0, [sp, #92]
987 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
988 ; NONEON-NOSVE-NEXT: fcvt s0, h0
989 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
990 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #40]
991 ; NONEON-NOSVE-NEXT: fcvt s1, h1
992 ; NONEON-NOSVE-NEXT: fcvt h0, s0
993 ; NONEON-NOSVE-NEXT: str h0, [sp, #90]
994 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
995 ; NONEON-NOSVE-NEXT: fcvt s0, h0
996 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
997 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #38]
998 ; NONEON-NOSVE-NEXT: fcvt s1, h1
999 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1000 ; NONEON-NOSVE-NEXT: str h0, [sp, #88]
1001 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
1002 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1003 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1004 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #36]
1005 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1006 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1007 ; NONEON-NOSVE-NEXT: str h0, [sp, #86]
1008 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
1009 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1010 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1011 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #34]
1012 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1013 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1014 ; NONEON-NOSVE-NEXT: str h0, [sp, #84]
1015 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
1016 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1017 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1018 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
1019 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1020 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1021 ; NONEON-NOSVE-NEXT: str h0, [sp, #82]
1022 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
1023 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1024 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1025 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1026 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1027 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1028 ; NONEON-NOSVE-NEXT: str h0, [sp, #80]
1029 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
1030 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1031 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1032 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1033 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1034 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1035 ; NONEON-NOSVE-NEXT: str h0, [sp, #78]
1036 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
1037 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1038 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1039 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1040 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1041 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1042 ; NONEON-NOSVE-NEXT: str h0, [sp, #76]
1043 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
1044 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1045 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1046 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1047 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1048 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1049 ; NONEON-NOSVE-NEXT: str h0, [sp, #74]
1050 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
1051 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1052 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1053 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
1054 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1055 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1056 ; NONEON-NOSVE-NEXT: str h0, [sp, #72]
1057 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
1058 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1059 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1060 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
1061 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1062 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1063 ; NONEON-NOSVE-NEXT: str h0, [sp, #70]
1064 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
1065 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1066 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1067 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
1068 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1069 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1070 ; NONEON-NOSVE-NEXT: str h0, [sp, #68]
1071 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1072 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1073 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1074 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
1075 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1076 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1077 ; NONEON-NOSVE-NEXT: str h0, [sp, #66]
1078 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
1079 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1080 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1081 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1082 ; NONEON-NOSVE-NEXT: str h0, [sp, #64]
1083 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
1084 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
1085 ; NONEON-NOSVE-NEXT: add sp, sp, #96
1086 ; NONEON-NOSVE-NEXT: ret
1087 %op1 = load <16 x half>, ptr %a
1088 %op2 = load <16 x half>, ptr %b
1089 %res = fadd <16 x half> %op1, %op2
1090 store <16 x half> %res, ptr %a
1094 define void @fadd_v2f32(ptr %a, ptr %b) {
1095 ; CHECK-LABEL: fadd_v2f32:
1097 ; CHECK-NEXT: ptrue p0.s, vl2
1098 ; CHECK-NEXT: ldr d0, [x0]
1099 ; CHECK-NEXT: ldr d1, [x1]
1100 ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
1101 ; CHECK-NEXT: str d0, [x0]
1104 ; NONEON-NOSVE-LABEL: fadd_v2f32:
1105 ; NONEON-NOSVE: // %bb.0:
1106 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
1107 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1108 ; NONEON-NOSVE-NEXT: ldr d0, [x1]
1109 ; NONEON-NOSVE-NEXT: ldr d1, [x0]
1110 ; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #8]
1111 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
1112 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
1113 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
1114 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
1115 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1116 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
1117 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1118 ; NONEON-NOSVE-NEXT: str d0, [x0]
1119 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1120 ; NONEON-NOSVE-NEXT: ret
1121 %op1 = load <2 x float>, ptr %a
1122 %op2 = load <2 x float>, ptr %b
1123 %res = fadd <2 x float> %op1, %op2
1124 store <2 x float> %res, ptr %a
1128 define void @fadd_v4f32(ptr %a, ptr %b) {
1129 ; CHECK-LABEL: fadd_v4f32:
1131 ; CHECK-NEXT: ptrue p0.s, vl4
1132 ; CHECK-NEXT: ldr q0, [x0]
1133 ; CHECK-NEXT: ldr q1, [x1]
1134 ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
1135 ; CHECK-NEXT: str q0, [x0]
1138 ; NONEON-NOSVE-LABEL: fadd_v4f32:
1139 ; NONEON-NOSVE: // %bb.0:
1140 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
1141 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
1142 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
1143 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
1144 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
1145 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
1146 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
1147 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
1148 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1149 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
1150 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40]
1151 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
1152 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
1153 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
1154 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1155 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32]
1156 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
1157 ; NONEON-NOSVE-NEXT: str q0, [x0]
1158 ; NONEON-NOSVE-NEXT: add sp, sp, #48
1159 ; NONEON-NOSVE-NEXT: ret
1160 %op1 = load <4 x float>, ptr %a
1161 %op2 = load <4 x float>, ptr %b
1162 %res = fadd <4 x float> %op1, %op2
1163 store <4 x float> %res, ptr %a
1167 define void @fadd_v8f32(ptr %a, ptr %b) {
1168 ; CHECK-LABEL: fadd_v8f32:
1170 ; CHECK-NEXT: ldp q0, q3, [x1]
1171 ; CHECK-NEXT: ptrue p0.s, vl4
1172 ; CHECK-NEXT: ldp q1, q2, [x0]
1173 ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
1174 ; CHECK-NEXT: movprfx z1, z2
1175 ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s
1176 ; CHECK-NEXT: stp q0, q1, [x0]
1179 ; NONEON-NOSVE-LABEL: fadd_v8f32:
1180 ; NONEON-NOSVE: // %bb.0:
1181 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
1182 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
1183 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
1184 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
1185 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
1186 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
1187 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40]
1188 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
1189 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
1190 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
1191 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1192 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32]
1193 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88]
1194 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
1195 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
1196 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
1197 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1198 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
1199 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80]
1200 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
1201 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
1202 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
1203 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1204 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
1205 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72]
1206 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
1207 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
1208 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
1209 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
1210 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64]
1211 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
1212 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
1213 ; NONEON-NOSVE-NEXT: add sp, sp, #96
1214 ; NONEON-NOSVE-NEXT: ret
1215 %op1 = load <8 x float>, ptr %a
1216 %op2 = load <8 x float>, ptr %b
1217 %res = fadd <8 x float> %op1, %op2
1218 store <8 x float> %res, ptr %a
1222 define void @fadd_v2f64(ptr %a, ptr %b) {
1223 ; CHECK-LABEL: fadd_v2f64:
1225 ; CHECK-NEXT: ptrue p0.d, vl2
1226 ; CHECK-NEXT: ldr q0, [x0]
1227 ; CHECK-NEXT: ldr q1, [x1]
1228 ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
1229 ; CHECK-NEXT: str q0, [x0]
1232 ; NONEON-NOSVE-LABEL: fadd_v2f64:
1233 ; NONEON-NOSVE: // %bb.0:
1234 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
1235 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
1236 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
1237 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
1238 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
1239 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1240 ; NONEON-NOSVE-NEXT: fadd d3, d2, d0
1241 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
1242 ; NONEON-NOSVE-NEXT: fadd d0, d1, d0
1243 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32]
1244 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
1245 ; NONEON-NOSVE-NEXT: str q0, [x0]
1246 ; NONEON-NOSVE-NEXT: add sp, sp, #48
1247 ; NONEON-NOSVE-NEXT: ret
1248 %op1 = load <2 x double>, ptr %a
1249 %op2 = load <2 x double>, ptr %b
1250 %res = fadd <2 x double> %op1, %op2
1251 store <2 x double> %res, ptr %a
1255 define void @fadd_v4f64(ptr %a, ptr %b) {
1256 ; CHECK-LABEL: fadd_v4f64:
1258 ; CHECK-NEXT: ldp q0, q3, [x1]
1259 ; CHECK-NEXT: ptrue p0.d, vl2
1260 ; CHECK-NEXT: ldp q1, q2, [x0]
1261 ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
1262 ; CHECK-NEXT: movprfx z1, z2
1263 ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d
1264 ; CHECK-NEXT: stp q0, q1, [x0]
1267 ; NONEON-NOSVE-LABEL: fadd_v4f64:
1268 ; NONEON-NOSVE: // %bb.0:
1269 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
1270 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
1271 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
1272 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
1273 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
1274 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
1275 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32]
1276 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
1277 ; NONEON-NOSVE-NEXT: fadd d3, d2, d0
1278 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #48]
1279 ; NONEON-NOSVE-NEXT: fadd d0, d1, d0
1280 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
1281 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80]
1282 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1283 ; NONEON-NOSVE-NEXT: fadd d3, d2, d0
1284 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
1285 ; NONEON-NOSVE-NEXT: fadd d0, d1, d0
1286 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64]
1287 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
1288 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
1289 ; NONEON-NOSVE-NEXT: add sp, sp, #96
1290 ; NONEON-NOSVE-NEXT: ret
1291 %op1 = load <4 x double>, ptr %a
1292 %op2 = load <4 x double>, ptr %b
1293 %res = fadd <4 x double> %op1, %op2
1294 store <4 x double> %res, ptr %a
1298 declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
1299 declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
1300 declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
1301 declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
1302 declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)