1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
4 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
6 target triple = "aarch64-unknown-linux-gnu"
12 define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) {
13 ; CHECK-LABEL: fadd_v2f16:
15 ; CHECK-NEXT: ptrue p0.h, vl4
16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
17 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
18 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
19 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
22 ; NONEON-NOSVE-LABEL: fadd_v2f16:
23 ; NONEON-NOSVE: // %bb.0:
24 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
25 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
26 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
27 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
28 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
29 ; NONEON-NOSVE-NEXT: fcvt s0, h0
30 ; NONEON-NOSVE-NEXT: fcvt s1, h1
31 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
32 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
33 ; NONEON-NOSVE-NEXT: fcvt s1, h1
34 ; NONEON-NOSVE-NEXT: fcvt h0, s0
35 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
36 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
37 ; NONEON-NOSVE-NEXT: fcvt s0, h0
38 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
39 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
40 ; NONEON-NOSVE-NEXT: fcvt s1, h1
41 ; NONEON-NOSVE-NEXT: fcvt h0, s0
42 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
43 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
44 ; NONEON-NOSVE-NEXT: fcvt s0, h0
45 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
46 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
47 ; NONEON-NOSVE-NEXT: fcvt s1, h1
48 ; NONEON-NOSVE-NEXT: fcvt h0, s0
49 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
50 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
51 ; NONEON-NOSVE-NEXT: fcvt s0, h0
52 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
53 ; NONEON-NOSVE-NEXT: fcvt h0, s0
54 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
55 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
56 ; NONEON-NOSVE-NEXT: add sp, sp, #32
57 ; NONEON-NOSVE-NEXT: ret
58 %res = fadd <2 x half> %op1, %op2
62 define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) {
63 ; CHECK-LABEL: fadd_v4f16:
65 ; CHECK-NEXT: ptrue p0.h, vl4
66 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
67 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
68 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
69 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
72 ; NONEON-NOSVE-LABEL: fadd_v4f16:
73 ; NONEON-NOSVE: // %bb.0:
74 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
75 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
76 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
77 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
78 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
79 ; NONEON-NOSVE-NEXT: fcvt s0, h0
80 ; NONEON-NOSVE-NEXT: fcvt s1, h1
81 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
82 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
83 ; NONEON-NOSVE-NEXT: fcvt s1, h1
84 ; NONEON-NOSVE-NEXT: fcvt h0, s0
85 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
86 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
87 ; NONEON-NOSVE-NEXT: fcvt s0, h0
88 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
89 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
90 ; NONEON-NOSVE-NEXT: fcvt s1, h1
91 ; NONEON-NOSVE-NEXT: fcvt h0, s0
92 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
93 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
94 ; NONEON-NOSVE-NEXT: fcvt s0, h0
95 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
96 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
97 ; NONEON-NOSVE-NEXT: fcvt s1, h1
98 ; NONEON-NOSVE-NEXT: fcvt h0, s0
99 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
100 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
101 ; NONEON-NOSVE-NEXT: fcvt s0, h0
102 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
103 ; NONEON-NOSVE-NEXT: fcvt h0, s0
104 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
105 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
106 ; NONEON-NOSVE-NEXT: add sp, sp, #32
107 ; NONEON-NOSVE-NEXT: ret
108 %res = fadd <4 x half> %op1, %op2
112 define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) {
113 ; CHECK-LABEL: fadd_v8f16:
115 ; CHECK-NEXT: ptrue p0.h, vl8
116 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
117 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
118 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
119 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
122 ; NONEON-NOSVE-LABEL: fadd_v8f16:
123 ; NONEON-NOSVE: // %bb.0:
124 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
125 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
126 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
127 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
128 ; NONEON-NOSVE-NEXT: fcvt s0, h0
129 ; NONEON-NOSVE-NEXT: fcvt s1, h1
130 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
131 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
132 ; NONEON-NOSVE-NEXT: fcvt s1, h1
133 ; NONEON-NOSVE-NEXT: fcvt h0, s0
134 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
135 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
136 ; NONEON-NOSVE-NEXT: fcvt s0, h0
137 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
138 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
139 ; NONEON-NOSVE-NEXT: fcvt s1, h1
140 ; NONEON-NOSVE-NEXT: fcvt h0, s0
141 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
142 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
143 ; NONEON-NOSVE-NEXT: fcvt s0, h0
144 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
145 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
146 ; NONEON-NOSVE-NEXT: fcvt s1, h1
147 ; NONEON-NOSVE-NEXT: fcvt h0, s0
148 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
149 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
150 ; NONEON-NOSVE-NEXT: fcvt s0, h0
151 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
152 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
153 ; NONEON-NOSVE-NEXT: fcvt s1, h1
154 ; NONEON-NOSVE-NEXT: fcvt h0, s0
155 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
156 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
157 ; NONEON-NOSVE-NEXT: fcvt s0, h0
158 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
159 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
160 ; NONEON-NOSVE-NEXT: fcvt s1, h1
161 ; NONEON-NOSVE-NEXT: fcvt h0, s0
162 ; NONEON-NOSVE-NEXT: str h0, [sp, #38]
163 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
164 ; NONEON-NOSVE-NEXT: fcvt s0, h0
165 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
166 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
167 ; NONEON-NOSVE-NEXT: fcvt s1, h1
168 ; NONEON-NOSVE-NEXT: fcvt h0, s0
169 ; NONEON-NOSVE-NEXT: str h0, [sp, #36]
170 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
171 ; NONEON-NOSVE-NEXT: fcvt s0, h0
172 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
173 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
174 ; NONEON-NOSVE-NEXT: fcvt s1, h1
175 ; NONEON-NOSVE-NEXT: fcvt h0, s0
176 ; NONEON-NOSVE-NEXT: str h0, [sp, #34]
177 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
178 ; NONEON-NOSVE-NEXT: fcvt s0, h0
179 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
180 ; NONEON-NOSVE-NEXT: fcvt h0, s0
181 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
182 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
183 ; NONEON-NOSVE-NEXT: add sp, sp, #48
184 ; NONEON-NOSVE-NEXT: ret
185 %res = fadd <8 x half> %op1, %op2
189 define void @fadd_v16f16(ptr %a, ptr %b) {
190 ; CHECK-LABEL: fadd_v16f16:
192 ; CHECK-NEXT: ldp q0, q3, [x1]
193 ; CHECK-NEXT: ptrue p0.h, vl8
194 ; CHECK-NEXT: ldp q1, q2, [x0]
195 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
196 ; CHECK-NEXT: movprfx z1, z2
197 ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h
198 ; CHECK-NEXT: stp q0, q1, [x0]
201 ; NONEON-NOSVE-LABEL: fadd_v16f16:
202 ; NONEON-NOSVE: // %bb.0:
203 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
204 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
205 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
206 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
207 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
208 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
209 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
210 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #46]
211 ; NONEON-NOSVE-NEXT: fcvt s0, h0
212 ; NONEON-NOSVE-NEXT: fcvt s1, h1
213 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
214 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #44]
215 ; NONEON-NOSVE-NEXT: fcvt s1, h1
216 ; NONEON-NOSVE-NEXT: fcvt h0, s0
217 ; NONEON-NOSVE-NEXT: str h0, [sp, #94]
218 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
219 ; NONEON-NOSVE-NEXT: fcvt s0, h0
220 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
221 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #42]
222 ; NONEON-NOSVE-NEXT: fcvt s1, h1
223 ; NONEON-NOSVE-NEXT: fcvt h0, s0
224 ; NONEON-NOSVE-NEXT: str h0, [sp, #92]
225 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
226 ; NONEON-NOSVE-NEXT: fcvt s0, h0
227 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
228 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #40]
229 ; NONEON-NOSVE-NEXT: fcvt s1, h1
230 ; NONEON-NOSVE-NEXT: fcvt h0, s0
231 ; NONEON-NOSVE-NEXT: str h0, [sp, #90]
232 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
233 ; NONEON-NOSVE-NEXT: fcvt s0, h0
234 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
235 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #38]
236 ; NONEON-NOSVE-NEXT: fcvt s1, h1
237 ; NONEON-NOSVE-NEXT: fcvt h0, s0
238 ; NONEON-NOSVE-NEXT: str h0, [sp, #88]
239 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
240 ; NONEON-NOSVE-NEXT: fcvt s0, h0
241 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
242 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #36]
243 ; NONEON-NOSVE-NEXT: fcvt s1, h1
244 ; NONEON-NOSVE-NEXT: fcvt h0, s0
245 ; NONEON-NOSVE-NEXT: str h0, [sp, #86]
246 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
247 ; NONEON-NOSVE-NEXT: fcvt s0, h0
248 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
249 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #34]
250 ; NONEON-NOSVE-NEXT: fcvt s1, h1
251 ; NONEON-NOSVE-NEXT: fcvt h0, s0
252 ; NONEON-NOSVE-NEXT: str h0, [sp, #84]
253 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
254 ; NONEON-NOSVE-NEXT: fcvt s0, h0
255 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
256 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
257 ; NONEON-NOSVE-NEXT: fcvt s1, h1
258 ; NONEON-NOSVE-NEXT: fcvt h0, s0
259 ; NONEON-NOSVE-NEXT: str h0, [sp, #82]
260 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
261 ; NONEON-NOSVE-NEXT: fcvt s0, h0
262 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
263 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
264 ; NONEON-NOSVE-NEXT: fcvt s1, h1
265 ; NONEON-NOSVE-NEXT: fcvt h0, s0
266 ; NONEON-NOSVE-NEXT: str h0, [sp, #80]
267 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
268 ; NONEON-NOSVE-NEXT: fcvt s0, h0
269 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
270 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
271 ; NONEON-NOSVE-NEXT: fcvt s1, h1
272 ; NONEON-NOSVE-NEXT: fcvt h0, s0
273 ; NONEON-NOSVE-NEXT: str h0, [sp, #78]
274 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
275 ; NONEON-NOSVE-NEXT: fcvt s0, h0
276 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
277 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
278 ; NONEON-NOSVE-NEXT: fcvt s1, h1
279 ; NONEON-NOSVE-NEXT: fcvt h0, s0
280 ; NONEON-NOSVE-NEXT: str h0, [sp, #76]
281 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
282 ; NONEON-NOSVE-NEXT: fcvt s0, h0
283 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
284 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
285 ; NONEON-NOSVE-NEXT: fcvt s1, h1
286 ; NONEON-NOSVE-NEXT: fcvt h0, s0
287 ; NONEON-NOSVE-NEXT: str h0, [sp, #74]
288 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
289 ; NONEON-NOSVE-NEXT: fcvt s0, h0
290 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
291 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
292 ; NONEON-NOSVE-NEXT: fcvt s1, h1
293 ; NONEON-NOSVE-NEXT: fcvt h0, s0
294 ; NONEON-NOSVE-NEXT: str h0, [sp, #72]
295 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
296 ; NONEON-NOSVE-NEXT: fcvt s0, h0
297 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
298 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
299 ; NONEON-NOSVE-NEXT: fcvt s1, h1
300 ; NONEON-NOSVE-NEXT: fcvt h0, s0
301 ; NONEON-NOSVE-NEXT: str h0, [sp, #70]
302 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
303 ; NONEON-NOSVE-NEXT: fcvt s0, h0
304 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
305 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
306 ; NONEON-NOSVE-NEXT: fcvt s1, h1
307 ; NONEON-NOSVE-NEXT: fcvt h0, s0
308 ; NONEON-NOSVE-NEXT: str h0, [sp, #68]
309 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
310 ; NONEON-NOSVE-NEXT: fcvt s0, h0
311 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
312 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
313 ; NONEON-NOSVE-NEXT: fcvt s1, h1
314 ; NONEON-NOSVE-NEXT: fcvt h0, s0
315 ; NONEON-NOSVE-NEXT: str h0, [sp, #66]
316 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
317 ; NONEON-NOSVE-NEXT: fcvt s0, h0
318 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
319 ; NONEON-NOSVE-NEXT: fcvt h0, s0
320 ; NONEON-NOSVE-NEXT: str h0, [sp, #64]
321 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
322 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
323 ; NONEON-NOSVE-NEXT: add sp, sp, #96
324 ; NONEON-NOSVE-NEXT: ret
325 %op1 = load <16 x half>, ptr %a
326 %op2 = load <16 x half>, ptr %b
327 %res = fadd <16 x half> %op1, %op2
328 store <16 x half> %res, ptr %a
332 define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) {
333 ; CHECK-LABEL: fadd_v2f32:
335 ; CHECK-NEXT: ptrue p0.s, vl2
336 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
337 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
338 ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
339 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
342 ; NONEON-NOSVE-LABEL: fadd_v2f32:
343 ; NONEON-NOSVE: // %bb.0:
344 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
345 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
346 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
347 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
348 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
349 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
350 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
351 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
352 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
353 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
354 ; NONEON-NOSVE-NEXT: add sp, sp, #32
355 ; NONEON-NOSVE-NEXT: ret
356 %res = fadd <2 x float> %op1, %op2
360 define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) {
361 ; CHECK-LABEL: fadd_v4f32:
363 ; CHECK-NEXT: ptrue p0.s, vl4
364 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
365 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
366 ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
367 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
370 ; NONEON-NOSVE-LABEL: fadd_v4f32:
371 ; NONEON-NOSVE: // %bb.0:
372 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
373 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
374 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
375 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
376 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
377 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
378 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
379 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
380 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40]
381 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
382 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
383 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
384 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
385 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32]
386 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
387 ; NONEON-NOSVE-NEXT: add sp, sp, #48
388 ; NONEON-NOSVE-NEXT: ret
389 %res = fadd <4 x float> %op1, %op2
393 define void @fadd_v8f32(ptr %a, ptr %b) {
394 ; CHECK-LABEL: fadd_v8f32:
396 ; CHECK-NEXT: ldp q0, q3, [x1]
397 ; CHECK-NEXT: ptrue p0.s, vl4
398 ; CHECK-NEXT: ldp q1, q2, [x0]
399 ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
400 ; CHECK-NEXT: movprfx z1, z2
401 ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s
402 ; CHECK-NEXT: stp q0, q1, [x0]
405 ; NONEON-NOSVE-LABEL: fadd_v8f32:
406 ; NONEON-NOSVE: // %bb.0:
407 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
408 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
409 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
410 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
411 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
412 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
413 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40]
414 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
415 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
416 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
417 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
418 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32]
419 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88]
420 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
421 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
422 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
423 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
424 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
425 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80]
426 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
427 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
428 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
429 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
430 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
431 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72]
432 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
433 ; NONEON-NOSVE-NEXT: fadd s3, s2, s0
434 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
435 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
436 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64]
437 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
438 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
439 ; NONEON-NOSVE-NEXT: add sp, sp, #96
440 ; NONEON-NOSVE-NEXT: ret
441 %op1 = load <8 x float>, ptr %a
442 %op2 = load <8 x float>, ptr %b
443 %res = fadd <8 x float> %op1, %op2
444 store <8 x float> %res, ptr %a
448 define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) {
449 ; CHECK-LABEL: fadd_v2f64:
451 ; CHECK-NEXT: ptrue p0.d, vl2
452 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
453 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
454 ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
455 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
458 ; NONEON-NOSVE-LABEL: fadd_v2f64:
459 ; NONEON-NOSVE: // %bb.0:
460 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
461 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
462 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
463 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
464 ; NONEON-NOSVE-NEXT: fadd d3, d2, d0
465 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
466 ; NONEON-NOSVE-NEXT: fadd d0, d1, d0
467 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32]
468 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
469 ; NONEON-NOSVE-NEXT: add sp, sp, #48
470 ; NONEON-NOSVE-NEXT: ret
471 %res = fadd <2 x double> %op1, %op2
472 ret <2 x double> %res
475 define void @fadd_v4f64(ptr %a, ptr %b) {
476 ; CHECK-LABEL: fadd_v4f64:
478 ; CHECK-NEXT: ldp q0, q3, [x1]
479 ; CHECK-NEXT: ptrue p0.d, vl2
480 ; CHECK-NEXT: ldp q1, q2, [x0]
481 ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
482 ; CHECK-NEXT: movprfx z1, z2
483 ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d
484 ; CHECK-NEXT: stp q0, q1, [x0]
487 ; NONEON-NOSVE-LABEL: fadd_v4f64:
488 ; NONEON-NOSVE: // %bb.0:
489 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
490 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
491 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
492 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
493 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
494 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
495 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32]
496 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
497 ; NONEON-NOSVE-NEXT: fadd d3, d2, d0
498 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #48]
499 ; NONEON-NOSVE-NEXT: fadd d0, d1, d0
500 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
501 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80]
502 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
503 ; NONEON-NOSVE-NEXT: fadd d3, d2, d0
504 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
505 ; NONEON-NOSVE-NEXT: fadd d0, d1, d0
506 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64]
507 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
508 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
509 ; NONEON-NOSVE-NEXT: add sp, sp, #96
510 ; NONEON-NOSVE-NEXT: ret
511 %op1 = load <4 x double>, ptr %a
512 %op2 = load <4 x double>, ptr %b
513 %res = fadd <4 x double> %op1, %op2
514 store <4 x double> %res, ptr %a
522 define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) {
523 ; CHECK-LABEL: fdiv_v2f16:
525 ; CHECK-NEXT: ptrue p0.h, vl4
526 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
527 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
528 ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
529 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
532 ; NONEON-NOSVE-LABEL: fdiv_v2f16:
533 ; NONEON-NOSVE: // %bb.0:
534 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
535 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
536 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
537 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
538 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
539 ; NONEON-NOSVE-NEXT: fcvt s0, h0
540 ; NONEON-NOSVE-NEXT: fcvt s1, h1
541 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
542 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
543 ; NONEON-NOSVE-NEXT: fcvt s1, h1
544 ; NONEON-NOSVE-NEXT: fcvt h0, s0
545 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
546 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
547 ; NONEON-NOSVE-NEXT: fcvt s0, h0
548 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
549 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
550 ; NONEON-NOSVE-NEXT: fcvt s1, h1
551 ; NONEON-NOSVE-NEXT: fcvt h0, s0
552 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
553 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
554 ; NONEON-NOSVE-NEXT: fcvt s0, h0
555 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
556 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
557 ; NONEON-NOSVE-NEXT: fcvt s1, h1
558 ; NONEON-NOSVE-NEXT: fcvt h0, s0
559 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
560 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
561 ; NONEON-NOSVE-NEXT: fcvt s0, h0
562 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
563 ; NONEON-NOSVE-NEXT: fcvt h0, s0
564 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
565 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
566 ; NONEON-NOSVE-NEXT: add sp, sp, #32
567 ; NONEON-NOSVE-NEXT: ret
568 %res = fdiv <2 x half> %op1, %op2
572 define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) {
573 ; CHECK-LABEL: fdiv_v4f16:
575 ; CHECK-NEXT: ptrue p0.h, vl4
576 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
577 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
578 ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
579 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
582 ; NONEON-NOSVE-LABEL: fdiv_v4f16:
583 ; NONEON-NOSVE: // %bb.0:
584 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
585 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
586 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
587 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
588 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
589 ; NONEON-NOSVE-NEXT: fcvt s0, h0
590 ; NONEON-NOSVE-NEXT: fcvt s1, h1
591 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
592 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
593 ; NONEON-NOSVE-NEXT: fcvt s1, h1
594 ; NONEON-NOSVE-NEXT: fcvt h0, s0
595 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
596 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
597 ; NONEON-NOSVE-NEXT: fcvt s0, h0
598 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
599 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
600 ; NONEON-NOSVE-NEXT: fcvt s1, h1
601 ; NONEON-NOSVE-NEXT: fcvt h0, s0
602 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
603 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
604 ; NONEON-NOSVE-NEXT: fcvt s0, h0
605 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
606 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
607 ; NONEON-NOSVE-NEXT: fcvt s1, h1
608 ; NONEON-NOSVE-NEXT: fcvt h0, s0
609 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
610 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
611 ; NONEON-NOSVE-NEXT: fcvt s0, h0
612 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
613 ; NONEON-NOSVE-NEXT: fcvt h0, s0
614 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
615 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
616 ; NONEON-NOSVE-NEXT: add sp, sp, #32
617 ; NONEON-NOSVE-NEXT: ret
618 %res = fdiv <4 x half> %op1, %op2
622 define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) {
623 ; CHECK-LABEL: fdiv_v8f16:
625 ; CHECK-NEXT: ptrue p0.h, vl8
626 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
627 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
628 ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
629 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
632 ; NONEON-NOSVE-LABEL: fdiv_v8f16:
633 ; NONEON-NOSVE: // %bb.0:
634 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
635 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
636 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
637 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
638 ; NONEON-NOSVE-NEXT: fcvt s0, h0
639 ; NONEON-NOSVE-NEXT: fcvt s1, h1
640 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
641 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
642 ; NONEON-NOSVE-NEXT: fcvt s1, h1
643 ; NONEON-NOSVE-NEXT: fcvt h0, s0
644 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
645 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
646 ; NONEON-NOSVE-NEXT: fcvt s0, h0
647 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
648 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
649 ; NONEON-NOSVE-NEXT: fcvt s1, h1
650 ; NONEON-NOSVE-NEXT: fcvt h0, s0
651 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
652 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
653 ; NONEON-NOSVE-NEXT: fcvt s0, h0
654 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
655 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
656 ; NONEON-NOSVE-NEXT: fcvt s1, h1
657 ; NONEON-NOSVE-NEXT: fcvt h0, s0
658 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
659 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
660 ; NONEON-NOSVE-NEXT: fcvt s0, h0
661 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
662 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
663 ; NONEON-NOSVE-NEXT: fcvt s1, h1
664 ; NONEON-NOSVE-NEXT: fcvt h0, s0
665 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
666 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
667 ; NONEON-NOSVE-NEXT: fcvt s0, h0
668 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
669 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
670 ; NONEON-NOSVE-NEXT: fcvt s1, h1
671 ; NONEON-NOSVE-NEXT: fcvt h0, s0
672 ; NONEON-NOSVE-NEXT: str h0, [sp, #38]
673 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
674 ; NONEON-NOSVE-NEXT: fcvt s0, h0
675 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
676 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
677 ; NONEON-NOSVE-NEXT: fcvt s1, h1
678 ; NONEON-NOSVE-NEXT: fcvt h0, s0
679 ; NONEON-NOSVE-NEXT: str h0, [sp, #36]
680 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
681 ; NONEON-NOSVE-NEXT: fcvt s0, h0
682 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
683 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
684 ; NONEON-NOSVE-NEXT: fcvt s1, h1
685 ; NONEON-NOSVE-NEXT: fcvt h0, s0
686 ; NONEON-NOSVE-NEXT: str h0, [sp, #34]
687 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
688 ; NONEON-NOSVE-NEXT: fcvt s0, h0
689 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
690 ; NONEON-NOSVE-NEXT: fcvt h0, s0
691 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
692 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
693 ; NONEON-NOSVE-NEXT: add sp, sp, #48
694 ; NONEON-NOSVE-NEXT: ret
695 %res = fdiv <8 x half> %op1, %op2
699 define void @fdiv_v16f16(ptr %a, ptr %b) {
700 ; CHECK-LABEL: fdiv_v16f16:
702 ; CHECK-NEXT: ldp q0, q3, [x1]
703 ; CHECK-NEXT: ptrue p0.h, vl8
704 ; CHECK-NEXT: ldp q1, q2, [x0]
705 ; CHECK-NEXT: fdivr z0.h, p0/m, z0.h, z1.h
706 ; CHECK-NEXT: movprfx z1, z2
707 ; CHECK-NEXT: fdiv z1.h, p0/m, z1.h, z3.h
708 ; CHECK-NEXT: stp q0, q1, [x0]
711 ; NONEON-NOSVE-LABEL: fdiv_v16f16:
712 ; NONEON-NOSVE: // %bb.0:
713 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
714 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
715 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
716 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
717 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
718 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
719 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
720 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #46]
721 ; NONEON-NOSVE-NEXT: fcvt s0, h0
722 ; NONEON-NOSVE-NEXT: fcvt s1, h1
723 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
724 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #44]
725 ; NONEON-NOSVE-NEXT: fcvt s1, h1
726 ; NONEON-NOSVE-NEXT: fcvt h0, s0
727 ; NONEON-NOSVE-NEXT: str h0, [sp, #94]
728 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
729 ; NONEON-NOSVE-NEXT: fcvt s0, h0
730 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
731 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #42]
732 ; NONEON-NOSVE-NEXT: fcvt s1, h1
733 ; NONEON-NOSVE-NEXT: fcvt h0, s0
734 ; NONEON-NOSVE-NEXT: str h0, [sp, #92]
735 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
736 ; NONEON-NOSVE-NEXT: fcvt s0, h0
737 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
738 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #40]
739 ; NONEON-NOSVE-NEXT: fcvt s1, h1
740 ; NONEON-NOSVE-NEXT: fcvt h0, s0
741 ; NONEON-NOSVE-NEXT: str h0, [sp, #90]
742 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
743 ; NONEON-NOSVE-NEXT: fcvt s0, h0
744 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
745 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #38]
746 ; NONEON-NOSVE-NEXT: fcvt s1, h1
747 ; NONEON-NOSVE-NEXT: fcvt h0, s0
748 ; NONEON-NOSVE-NEXT: str h0, [sp, #88]
749 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
750 ; NONEON-NOSVE-NEXT: fcvt s0, h0
751 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
752 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #36]
753 ; NONEON-NOSVE-NEXT: fcvt s1, h1
754 ; NONEON-NOSVE-NEXT: fcvt h0, s0
755 ; NONEON-NOSVE-NEXT: str h0, [sp, #86]
756 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
757 ; NONEON-NOSVE-NEXT: fcvt s0, h0
758 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
759 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #34]
760 ; NONEON-NOSVE-NEXT: fcvt s1, h1
761 ; NONEON-NOSVE-NEXT: fcvt h0, s0
762 ; NONEON-NOSVE-NEXT: str h0, [sp, #84]
763 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
764 ; NONEON-NOSVE-NEXT: fcvt s0, h0
765 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
766 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
767 ; NONEON-NOSVE-NEXT: fcvt s1, h1
768 ; NONEON-NOSVE-NEXT: fcvt h0, s0
769 ; NONEON-NOSVE-NEXT: str h0, [sp, #82]
770 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
771 ; NONEON-NOSVE-NEXT: fcvt s0, h0
772 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
773 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
774 ; NONEON-NOSVE-NEXT: fcvt s1, h1
775 ; NONEON-NOSVE-NEXT: fcvt h0, s0
776 ; NONEON-NOSVE-NEXT: str h0, [sp, #80]
777 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
778 ; NONEON-NOSVE-NEXT: fcvt s0, h0
779 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
780 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
781 ; NONEON-NOSVE-NEXT: fcvt s1, h1
782 ; NONEON-NOSVE-NEXT: fcvt h0, s0
783 ; NONEON-NOSVE-NEXT: str h0, [sp, #78]
784 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
785 ; NONEON-NOSVE-NEXT: fcvt s0, h0
786 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
787 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
788 ; NONEON-NOSVE-NEXT: fcvt s1, h1
789 ; NONEON-NOSVE-NEXT: fcvt h0, s0
790 ; NONEON-NOSVE-NEXT: str h0, [sp, #76]
791 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
792 ; NONEON-NOSVE-NEXT: fcvt s0, h0
793 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
794 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
795 ; NONEON-NOSVE-NEXT: fcvt s1, h1
796 ; NONEON-NOSVE-NEXT: fcvt h0, s0
797 ; NONEON-NOSVE-NEXT: str h0, [sp, #74]
798 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
799 ; NONEON-NOSVE-NEXT: fcvt s0, h0
800 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
801 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
802 ; NONEON-NOSVE-NEXT: fcvt s1, h1
803 ; NONEON-NOSVE-NEXT: fcvt h0, s0
804 ; NONEON-NOSVE-NEXT: str h0, [sp, #72]
805 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
806 ; NONEON-NOSVE-NEXT: fcvt s0, h0
807 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
808 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
809 ; NONEON-NOSVE-NEXT: fcvt s1, h1
810 ; NONEON-NOSVE-NEXT: fcvt h0, s0
811 ; NONEON-NOSVE-NEXT: str h0, [sp, #70]
812 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
813 ; NONEON-NOSVE-NEXT: fcvt s0, h0
814 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
815 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
816 ; NONEON-NOSVE-NEXT: fcvt s1, h1
817 ; NONEON-NOSVE-NEXT: fcvt h0, s0
818 ; NONEON-NOSVE-NEXT: str h0, [sp, #68]
819 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
820 ; NONEON-NOSVE-NEXT: fcvt s0, h0
821 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
822 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
823 ; NONEON-NOSVE-NEXT: fcvt s1, h1
824 ; NONEON-NOSVE-NEXT: fcvt h0, s0
825 ; NONEON-NOSVE-NEXT: str h0, [sp, #66]
826 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
827 ; NONEON-NOSVE-NEXT: fcvt s0, h0
828 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
829 ; NONEON-NOSVE-NEXT: fcvt h0, s0
830 ; NONEON-NOSVE-NEXT: str h0, [sp, #64]
831 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
832 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
833 ; NONEON-NOSVE-NEXT: add sp, sp, #96
834 ; NONEON-NOSVE-NEXT: ret
835 %op1 = load <16 x half>, ptr %a
836 %op2 = load <16 x half>, ptr %b
837 %res = fdiv <16 x half> %op1, %op2
838 store <16 x half> %res, ptr %a
842 define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) {
843 ; CHECK-LABEL: fdiv_v2f32:
845 ; CHECK-NEXT: ptrue p0.s, vl2
846 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
847 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
848 ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
849 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
852 ; NONEON-NOSVE-LABEL: fdiv_v2f32:
853 ; NONEON-NOSVE: // %bb.0:
854 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
855 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
856 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
857 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
858 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
859 ; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
860 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
861 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
862 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
863 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
864 ; NONEON-NOSVE-NEXT: add sp, sp, #32
865 ; NONEON-NOSVE-NEXT: ret
866 %res = fdiv <2 x float> %op1, %op2
870 define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) {
871 ; CHECK-LABEL: fdiv_v4f32:
873 ; CHECK-NEXT: ptrue p0.s, vl4
874 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
875 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
876 ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
877 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
880 ; NONEON-NOSVE-LABEL: fdiv_v4f32:
881 ; NONEON-NOSVE: // %bb.0:
882 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
883 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
884 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
885 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
886 ; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
887 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
888 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
889 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
890 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40]
891 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
892 ; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
893 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
894 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
895 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32]
896 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
897 ; NONEON-NOSVE-NEXT: add sp, sp, #48
898 ; NONEON-NOSVE-NEXT: ret
899 %res = fdiv <4 x float> %op1, %op2
903 define void @fdiv_v8f32(ptr %a, ptr %b) {
904 ; CHECK-LABEL: fdiv_v8f32:
906 ; CHECK-NEXT: ldp q0, q3, [x1]
907 ; CHECK-NEXT: ptrue p0.s, vl4
908 ; CHECK-NEXT: ldp q1, q2, [x0]
909 ; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s
910 ; CHECK-NEXT: movprfx z1, z2
911 ; CHECK-NEXT: fdiv z1.s, p0/m, z1.s, z3.s
912 ; CHECK-NEXT: stp q0, q1, [x0]
915 ; NONEON-NOSVE-LABEL: fdiv_v8f32:
916 ; NONEON-NOSVE: // %bb.0:
917 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
918 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
919 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
920 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
921 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
922 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
923 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40]
924 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
925 ; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
926 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
927 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
928 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32]
929 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88]
930 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
931 ; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
932 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
933 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
934 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
935 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80]
936 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
937 ; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
938 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
939 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
940 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
941 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72]
942 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
943 ; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
944 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
945 ; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
946 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64]
947 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
948 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
949 ; NONEON-NOSVE-NEXT: add sp, sp, #96
950 ; NONEON-NOSVE-NEXT: ret
951 %op1 = load <8 x float>, ptr %a
952 %op2 = load <8 x float>, ptr %b
953 %res = fdiv <8 x float> %op1, %op2
954 store <8 x float> %res, ptr %a
958 define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) {
959 ; CHECK-LABEL: fdiv_v2f64:
961 ; CHECK-NEXT: ptrue p0.d, vl2
962 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
963 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
964 ; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
965 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
968 ; NONEON-NOSVE-LABEL: fdiv_v2f64:
969 ; NONEON-NOSVE: // %bb.0:
970 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
971 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
972 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
973 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
974 ; NONEON-NOSVE-NEXT: fdiv d3, d2, d0
975 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
976 ; NONEON-NOSVE-NEXT: fdiv d0, d1, d0
977 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32]
978 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
979 ; NONEON-NOSVE-NEXT: add sp, sp, #48
980 ; NONEON-NOSVE-NEXT: ret
981 %res = fdiv <2 x double> %op1, %op2
982 ret <2 x double> %res
985 define void @fdiv_v4f64(ptr %a, ptr %b) {
986 ; CHECK-LABEL: fdiv_v4f64:
988 ; CHECK-NEXT: ldp q0, q3, [x1]
989 ; CHECK-NEXT: ptrue p0.d, vl2
990 ; CHECK-NEXT: ldp q1, q2, [x0]
991 ; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d
992 ; CHECK-NEXT: movprfx z1, z2
993 ; CHECK-NEXT: fdiv z1.d, p0/m, z1.d, z3.d
994 ; CHECK-NEXT: stp q0, q1, [x0]
997 ; NONEON-NOSVE-LABEL: fdiv_v4f64:
998 ; NONEON-NOSVE: // %bb.0:
999 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
1000 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
1001 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
1002 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
1003 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
1004 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
1005 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32]
1006 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
1007 ; NONEON-NOSVE-NEXT: fdiv d3, d2, d0
1008 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #48]
1009 ; NONEON-NOSVE-NEXT: fdiv d0, d1, d0
1010 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
1011 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80]
1012 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1013 ; NONEON-NOSVE-NEXT: fdiv d3, d2, d0
1014 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
1015 ; NONEON-NOSVE-NEXT: fdiv d0, d1, d0
1016 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64]
1017 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
1018 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
1019 ; NONEON-NOSVE-NEXT: add sp, sp, #96
1020 ; NONEON-NOSVE-NEXT: ret
1021 %op1 = load <4 x double>, ptr %a
1022 %op2 = load <4 x double>, ptr %b
1023 %res = fdiv <4 x double> %op1, %op2
1024 store <4 x double> %res, ptr %a
1032 define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3) {
1033 ; CHECK-LABEL: fma_v2f16:
1035 ; CHECK-NEXT: ptrue p0.h, vl4
1036 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1037 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
1038 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
1039 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
1040 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1043 ; NONEON-NOSVE-LABEL: fma_v2f16:
1044 ; NONEON-NOSVE: // %bb.0:
1045 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
1046 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1047 ; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8]
1048 ; NONEON-NOSVE-NEXT: str d0, [sp]
1049 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
1050 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1051 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
1052 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1053 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1054 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1055 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1056 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1057 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
1058 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1059 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1060 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1061 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
1062 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
1063 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1064 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1065 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1066 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
1067 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1068 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1069 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1070 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
1071 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1072 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1073 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1074 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1075 ; NONEON-NOSVE-NEXT: ldr h2, [sp]
1076 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1077 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1078 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1079 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
1080 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
1081 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1082 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1083 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1084 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
1085 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1086 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1087 ; NONEON-NOSVE-NEXT: ret
1088 %res = call <2 x half> @llvm.fma.v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3)
1092 define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) {
1093 ; CHECK-LABEL: fma_v4f16:
1095 ; CHECK-NEXT: ptrue p0.h, vl4
1096 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1097 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
1098 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
1099 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
1100 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1103 ; NONEON-NOSVE-LABEL: fma_v4f16:
1104 ; NONEON-NOSVE: // %bb.0:
1105 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
1106 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1107 ; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8]
1108 ; NONEON-NOSVE-NEXT: str d0, [sp]
1109 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
1110 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1111 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
1112 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1113 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1114 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1115 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1116 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1117 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
1118 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1119 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1120 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1121 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
1122 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
1123 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1124 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1125 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1126 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
1127 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1128 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1129 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1130 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
1131 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1132 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1133 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1134 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1135 ; NONEON-NOSVE-NEXT: ldr h2, [sp]
1136 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1137 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1138 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1139 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
1140 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
1141 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1142 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1143 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1144 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
1145 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1146 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1147 ; NONEON-NOSVE-NEXT: ret
1148 %res = call <4 x half> @llvm.fma.v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
1152 define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) {
1153 ; CHECK-LABEL: fma_v8f16:
1155 ; CHECK-NEXT: ptrue p0.h, vl8
1156 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1157 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
1158 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
1159 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
1160 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1163 ; NONEON-NOSVE-LABEL: fma_v8f16:
1164 ; NONEON-NOSVE: // %bb.0:
1165 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
1166 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
1167 ; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16]
1168 ; NONEON-NOSVE-NEXT: str q0, [sp]
1169 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
1170 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #30]
1171 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #14]
1172 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1173 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1174 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1175 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1176 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #28]
1177 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #12]
1178 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1179 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1180 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1181 ; NONEON-NOSVE-NEXT: str h0, [sp, #62]
1182 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
1183 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1184 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1185 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #26]
1186 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #10]
1187 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1188 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1189 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1190 ; NONEON-NOSVE-NEXT: str h0, [sp, #60]
1191 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
1192 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1193 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1194 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #24]
1195 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #8]
1196 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1197 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1198 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1199 ; NONEON-NOSVE-NEXT: str h0, [sp, #58]
1200 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
1201 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1202 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1203 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #22]
1204 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
1205 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1206 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1207 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1208 ; NONEON-NOSVE-NEXT: str h0, [sp, #56]
1209 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
1210 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1211 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1212 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #20]
1213 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
1214 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1215 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1216 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1217 ; NONEON-NOSVE-NEXT: str h0, [sp, #54]
1218 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
1219 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1220 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1221 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #18]
1222 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
1223 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1224 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1225 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1226 ; NONEON-NOSVE-NEXT: str h0, [sp, #52]
1227 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
1228 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1229 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1230 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #16]
1231 ; NONEON-NOSVE-NEXT: ldr h2, [sp]
1232 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1233 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1234 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1235 ; NONEON-NOSVE-NEXT: str h0, [sp, #50]
1236 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
1237 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1238 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1239 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1240 ; NONEON-NOSVE-NEXT: str h0, [sp, #48]
1241 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
1242 ; NONEON-NOSVE-NEXT: add sp, sp, #64
1243 ; NONEON-NOSVE-NEXT: ret
1244 %res = call <8 x half> @llvm.fma.v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
1248 define void @fma_v16f16(ptr %a, ptr %b, ptr %c) {
1249 ; CHECK-LABEL: fma_v16f16:
1251 ; CHECK-NEXT: ldp q0, q4, [x1]
1252 ; CHECK-NEXT: ptrue p0.h, vl8
1253 ; CHECK-NEXT: ldp q1, q5, [x2]
1254 ; CHECK-NEXT: ldp q2, q3, [x0]
1255 ; CHECK-NEXT: fmad z0.h, p0/m, z2.h, z1.h
1256 ; CHECK-NEXT: movprfx z1, z5
1257 ; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h
1258 ; CHECK-NEXT: stp q0, q1, [x0]
1261 ; NONEON-NOSVE-LABEL: fma_v16f16:
1262 ; NONEON-NOSVE: // %bb.0:
1263 ; NONEON-NOSVE-NEXT: sub sp, sp, #128
1264 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
1265 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x2]
1266 ; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
1267 ; NONEON-NOSVE-NEXT: ldp q4, q5, [x0]
1268 ; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64]
1269 ; NONEON-NOSVE-NEXT: stp q4, q2, [sp]
1270 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #94]
1271 ; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32]
1272 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #78]
1273 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #62]
1274 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1275 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1276 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1277 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1278 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #76]
1279 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #60]
1280 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1281 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1282 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1283 ; NONEON-NOSVE-NEXT: str h0, [sp, #126]
1284 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #92]
1285 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1286 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1287 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #74]
1288 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #58]
1289 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1290 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1291 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1292 ; NONEON-NOSVE-NEXT: str h0, [sp, #124]
1293 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #90]
1294 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1295 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1296 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #72]
1297 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #56]
1298 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1299 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1300 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1301 ; NONEON-NOSVE-NEXT: str h0, [sp, #122]
1302 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #88]
1303 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1304 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1305 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #70]
1306 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #54]
1307 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1308 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1309 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1310 ; NONEON-NOSVE-NEXT: str h0, [sp, #120]
1311 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #86]
1312 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1313 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1314 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #68]
1315 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #52]
1316 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1317 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1318 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1319 ; NONEON-NOSVE-NEXT: str h0, [sp, #118]
1320 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #84]
1321 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1322 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1323 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #66]
1324 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #50]
1325 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1326 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1327 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1328 ; NONEON-NOSVE-NEXT: str h0, [sp, #116]
1329 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #82]
1330 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1331 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1332 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #64]
1333 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #48]
1334 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1335 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1336 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1337 ; NONEON-NOSVE-NEXT: str h0, [sp, #114]
1338 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #80]
1339 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1340 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1341 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #30]
1342 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #14]
1343 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1344 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1345 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1346 ; NONEON-NOSVE-NEXT: str h0, [sp, #112]
1347 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
1348 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1349 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1350 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #28]
1351 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #12]
1352 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1353 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1354 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1355 ; NONEON-NOSVE-NEXT: str h0, [sp, #110]
1356 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
1357 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1358 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1359 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #26]
1360 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #10]
1361 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1362 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1363 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1364 ; NONEON-NOSVE-NEXT: str h0, [sp, #108]
1365 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
1366 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1367 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1368 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #24]
1369 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #8]
1370 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1371 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1372 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1373 ; NONEON-NOSVE-NEXT: str h0, [sp, #106]
1374 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
1375 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1376 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1377 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #22]
1378 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
1379 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1380 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1381 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1382 ; NONEON-NOSVE-NEXT: str h0, [sp, #104]
1383 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
1384 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1385 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1386 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #20]
1387 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
1388 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1389 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1390 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1391 ; NONEON-NOSVE-NEXT: str h0, [sp, #102]
1392 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
1393 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1394 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1395 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #18]
1396 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
1397 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1398 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1399 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1400 ; NONEON-NOSVE-NEXT: str h0, [sp, #100]
1401 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
1402 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1403 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1404 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #16]
1405 ; NONEON-NOSVE-NEXT: ldr h2, [sp]
1406 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1407 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1408 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1409 ; NONEON-NOSVE-NEXT: str h0, [sp, #98]
1410 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
1411 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1412 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1413 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1414 ; NONEON-NOSVE-NEXT: str h0, [sp, #96]
1415 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
1416 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
1417 ; NONEON-NOSVE-NEXT: add sp, sp, #128
1418 ; NONEON-NOSVE-NEXT: ret
1419 %op1 = load <16 x half>, ptr %a
1420 %op2 = load <16 x half>, ptr %b
1421 %op3 = load <16 x half>, ptr %c
1422 %res = call <16 x half> @llvm.fma.v16f16(<16 x half> %op1, <16 x half> %op2, <16 x half> %op3)
1423 store <16 x half> %res, ptr %a
1427 define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) {
1428 ; CHECK-LABEL: fma_v2f32:
1430 ; CHECK-NEXT: ptrue p0.s, vl2
1431 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1432 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
1433 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
1434 ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
1435 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1438 ; NONEON-NOSVE-LABEL: fma_v2f32:
1439 ; NONEON-NOSVE: // %bb.0:
1440 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
1441 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1442 ; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8]
1443 ; NONEON-NOSVE-NEXT: str d0, [sp]
1444 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #8]
1445 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp]
1446 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
1447 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
1448 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
1449 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1450 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #24]
1451 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1452 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1453 ; NONEON-NOSVE-NEXT: ret
1454 %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3)
1455 ret <2 x float> %res
1458 define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) {
1459 ; CHECK-LABEL: fma_v4f32:
1461 ; CHECK-NEXT: ptrue p0.s, vl4
1462 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1463 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
1464 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
1465 ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
1466 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1469 ; NONEON-NOSVE-LABEL: fma_v4f32:
1470 ; NONEON-NOSVE: // %bb.0:
1471 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
1472 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
1473 ; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16]
1474 ; NONEON-NOSVE-NEXT: str q0, [sp]
1475 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24]
1476 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
1477 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
1478 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
1479 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
1480 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1481 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16]
1482 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp]
1483 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #56]
1484 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
1485 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
1486 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
1487 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1488 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #48]
1489 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
1490 ; NONEON-NOSVE-NEXT: add sp, sp, #64
1491 ; NONEON-NOSVE-NEXT: ret
1492 %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3)
1493 ret <4 x float> %res
1496 define void @fma_v8f32(ptr %a, ptr %b, ptr %c) {
1497 ; CHECK-LABEL: fma_v8f32:
1499 ; CHECK-NEXT: ldp q0, q4, [x1]
1500 ; CHECK-NEXT: ptrue p0.s, vl4
1501 ; CHECK-NEXT: ldp q1, q5, [x2]
1502 ; CHECK-NEXT: ldp q2, q3, [x0]
1503 ; CHECK-NEXT: fmad z0.s, p0/m, z2.s, z1.s
1504 ; CHECK-NEXT: movprfx z1, z5
1505 ; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s
1506 ; CHECK-NEXT: stp q0, q1, [x0]
1509 ; NONEON-NOSVE-LABEL: fma_v8f32:
1510 ; NONEON-NOSVE: // %bb.0:
1511 ; NONEON-NOSVE-NEXT: sub sp, sp, #128
1512 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
1513 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x2]
1514 ; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
1515 ; NONEON-NOSVE-NEXT: ldp q4, q5, [x0]
1516 ; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64]
1517 ; NONEON-NOSVE-NEXT: stp q4, q2, [sp]
1518 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #92]
1519 ; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32]
1520 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #72]
1521 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #56]
1522 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
1523 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #88]
1524 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1525 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #64]
1526 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #48]
1527 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #120]
1528 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #84]
1529 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
1530 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #80]
1531 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1532 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24]
1533 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
1534 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #112]
1535 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
1536 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
1537 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
1538 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1539 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16]
1540 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp]
1541 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #104]
1542 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
1543 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
1544 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
1545 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
1546 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #96]
1547 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
1548 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
1549 ; NONEON-NOSVE-NEXT: add sp, sp, #128
1550 ; NONEON-NOSVE-NEXT: ret
1551 %op1 = load <8 x float>, ptr %a
1552 %op2 = load <8 x float>, ptr %b
1553 %op3 = load <8 x float>, ptr %c
1554 %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %op1, <8 x float> %op2, <8 x float> %op3)
1555 store <8 x float> %res, ptr %a
1559 define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) {
1560 ; CHECK-LABEL: fma_v2f64:
1562 ; CHECK-NEXT: ptrue p0.d, vl2
1563 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1564 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
1565 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
1566 ; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
1567 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1570 ; NONEON-NOSVE-LABEL: fma_v2f64:
1571 ; NONEON-NOSVE: // %bb.0:
1572 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
1573 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
1574 ; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16]
1575 ; NONEON-NOSVE-NEXT: str q0, [sp]
1576 ; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16]
1577 ; NONEON-NOSVE-NEXT: ldp d2, d4, [sp]
1578 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
1579 ; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0
1580 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
1581 ; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
1582 ; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #48]
1583 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
1584 ; NONEON-NOSVE-NEXT: add sp, sp, #64
1585 ; NONEON-NOSVE-NEXT: ret
1586 %res = call <2 x double> @llvm.fma.v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3)
1587 ret <2 x double> %res
1590 define void @fma_v4f64(ptr %a, ptr %b, ptr %c) {
1591 ; CHECK-LABEL: fma_v4f64:
1593 ; CHECK-NEXT: ldp q0, q4, [x1]
1594 ; CHECK-NEXT: ptrue p0.d, vl2
1595 ; CHECK-NEXT: ldp q1, q5, [x2]
1596 ; CHECK-NEXT: ldp q2, q3, [x0]
1597 ; CHECK-NEXT: fmad z0.d, p0/m, z2.d, z1.d
1598 ; CHECK-NEXT: movprfx z1, z5
1599 ; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d
1600 ; CHECK-NEXT: stp q0, q1, [x0]
1603 ; NONEON-NOSVE-LABEL: fma_v4f64:
1604 ; NONEON-NOSVE: // %bb.0:
1605 ; NONEON-NOSVE-NEXT: sub sp, sp, #128
1606 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
1607 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x2]
1608 ; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
1609 ; NONEON-NOSVE-NEXT: ldp q4, q5, [x0]
1610 ; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64]
1611 ; NONEON-NOSVE-NEXT: stp q4, q2, [sp]
1612 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #88]
1613 ; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32]
1614 ; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #64]
1615 ; NONEON-NOSVE-NEXT: ldp d2, d4, [sp, #48]
1616 ; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0
1617 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #80]
1618 ; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
1619 ; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16]
1620 ; NONEON-NOSVE-NEXT: ldp d2, d4, [sp]
1621 ; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #112]
1622 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
1623 ; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0
1624 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
1625 ; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
1626 ; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #96]
1627 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
1628 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
1629 ; NONEON-NOSVE-NEXT: add sp, sp, #128
1630 ; NONEON-NOSVE-NEXT: ret
1631 %op1 = load <4 x double>, ptr %a
1632 %op2 = load <4 x double>, ptr %b
1633 %op3 = load <4 x double>, ptr %c
1634 %res = call <4 x double> @llvm.fma.v4f64(<4 x double> %op1, <4 x double> %op2, <4 x double> %op3)
1635 store <4 x double> %res, ptr %a
1643 define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) {
1644 ; CHECK-LABEL: fmul_v2f16:
1646 ; CHECK-NEXT: ptrue p0.h, vl4
1647 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1648 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
1649 ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
1650 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1653 ; NONEON-NOSVE-LABEL: fmul_v2f16:
1654 ; NONEON-NOSVE: // %bb.0:
1655 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
1656 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1657 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
1658 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
1659 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1660 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1661 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1662 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1663 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1664 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1665 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1666 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
1667 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
1668 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1669 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1670 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1671 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1672 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1673 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
1674 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1675 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1676 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1677 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1678 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1679 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1680 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
1681 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
1682 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1683 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1684 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1685 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
1686 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1687 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1688 ; NONEON-NOSVE-NEXT: ret
1689 %res = fmul <2 x half> %op1, %op2
1693 define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) {
1694 ; CHECK-LABEL: fmul_v4f16:
1696 ; CHECK-NEXT: ptrue p0.h, vl4
1697 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1698 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
1699 ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
1700 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1703 ; NONEON-NOSVE-LABEL: fmul_v4f16:
1704 ; NONEON-NOSVE: // %bb.0:
1705 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
1706 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1707 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
1708 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
1709 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1710 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1711 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1712 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1713 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1714 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1715 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1716 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
1717 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
1718 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1719 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1720 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1721 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1722 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1723 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
1724 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1725 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1726 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1727 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1728 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1729 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1730 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
1731 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
1732 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1733 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1734 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1735 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
1736 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1737 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1738 ; NONEON-NOSVE-NEXT: ret
1739 %res = fmul <4 x half> %op1, %op2
1743 define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) {
1744 ; CHECK-LABEL: fmul_v8f16:
1746 ; CHECK-NEXT: ptrue p0.h, vl8
1747 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1748 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
1749 ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
1750 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1753 ; NONEON-NOSVE-LABEL: fmul_v8f16:
1754 ; NONEON-NOSVE: // %bb.0:
1755 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
1756 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
1757 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
1758 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1759 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1760 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1761 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1762 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1763 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1764 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1765 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
1766 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
1767 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1768 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1769 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1770 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1771 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1772 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
1773 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
1774 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1775 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1776 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1777 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1778 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1779 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
1780 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
1781 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1782 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1783 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
1784 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1785 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1786 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
1787 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
1788 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1789 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1790 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
1791 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1792 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1793 ; NONEON-NOSVE-NEXT: str h0, [sp, #38]
1794 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
1795 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1796 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1797 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
1798 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1799 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1800 ; NONEON-NOSVE-NEXT: str h0, [sp, #36]
1801 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1802 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1803 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1804 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
1805 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1806 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1807 ; NONEON-NOSVE-NEXT: str h0, [sp, #34]
1808 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
1809 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1810 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1811 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1812 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
1813 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
1814 ; NONEON-NOSVE-NEXT: add sp, sp, #48
1815 ; NONEON-NOSVE-NEXT: ret
1816 %res = fmul <8 x half> %op1, %op2
1820 define void @fmul_v16f16(ptr %a, ptr %b) {
1821 ; CHECK-LABEL: fmul_v16f16:
1823 ; CHECK-NEXT: ldp q0, q3, [x1]
1824 ; CHECK-NEXT: ptrue p0.h, vl8
1825 ; CHECK-NEXT: ldp q1, q2, [x0]
1826 ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
1827 ; CHECK-NEXT: movprfx z1, z2
1828 ; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h
1829 ; CHECK-NEXT: stp q0, q1, [x0]
1832 ; NONEON-NOSVE-LABEL: fmul_v16f16:
1833 ; NONEON-NOSVE: // %bb.0:
1834 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
1835 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
1836 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
1837 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
1838 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
1839 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
1840 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
1841 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #46]
1842 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1843 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1844 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1845 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #44]
1846 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1847 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1848 ; NONEON-NOSVE-NEXT: str h0, [sp, #94]
1849 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
1850 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1851 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1852 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #42]
1853 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1854 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1855 ; NONEON-NOSVE-NEXT: str h0, [sp, #92]
1856 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
1857 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1858 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1859 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #40]
1860 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1861 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1862 ; NONEON-NOSVE-NEXT: str h0, [sp, #90]
1863 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
1864 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1865 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1866 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #38]
1867 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1868 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1869 ; NONEON-NOSVE-NEXT: str h0, [sp, #88]
1870 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
1871 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1872 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1873 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #36]
1874 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1875 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1876 ; NONEON-NOSVE-NEXT: str h0, [sp, #86]
1877 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
1878 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1879 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1880 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #34]
1881 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1882 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1883 ; NONEON-NOSVE-NEXT: str h0, [sp, #84]
1884 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
1885 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1886 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1887 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
1888 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1889 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1890 ; NONEON-NOSVE-NEXT: str h0, [sp, #82]
1891 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
1892 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1893 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1894 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1895 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1896 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1897 ; NONEON-NOSVE-NEXT: str h0, [sp, #80]
1898 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
1899 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1900 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1901 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1902 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1903 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1904 ; NONEON-NOSVE-NEXT: str h0, [sp, #78]
1905 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
1906 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1907 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1908 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1909 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1910 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1911 ; NONEON-NOSVE-NEXT: str h0, [sp, #76]
1912 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
1913 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1914 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1915 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1916 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1917 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1918 ; NONEON-NOSVE-NEXT: str h0, [sp, #74]
1919 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
1920 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1921 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1922 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
1923 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1924 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1925 ; NONEON-NOSVE-NEXT: str h0, [sp, #72]
1926 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
1927 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1928 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1929 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
1930 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1931 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1932 ; NONEON-NOSVE-NEXT: str h0, [sp, #70]
1933 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
1934 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1935 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1936 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
1937 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1938 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1939 ; NONEON-NOSVE-NEXT: str h0, [sp, #68]
1940 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1941 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1942 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1943 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
1944 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1945 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1946 ; NONEON-NOSVE-NEXT: str h0, [sp, #66]
1947 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
1948 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1949 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1950 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1951 ; NONEON-NOSVE-NEXT: str h0, [sp, #64]
1952 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
1953 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
1954 ; NONEON-NOSVE-NEXT: add sp, sp, #96
1955 ; NONEON-NOSVE-NEXT: ret
1956 %op1 = load <16 x half>, ptr %a
1957 %op2 = load <16 x half>, ptr %b
1958 %res = fmul <16 x half> %op1, %op2
1959 store <16 x half> %res, ptr %a
1963 define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) {
1964 ; CHECK-LABEL: fmul_v2f32:
1966 ; CHECK-NEXT: ptrue p0.s, vl2
1967 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1968 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
1969 ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
1970 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1973 ; NONEON-NOSVE-LABEL: fmul_v2f32:
1974 ; NONEON-NOSVE: // %bb.0:
1975 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
1976 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1977 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
1978 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
1979 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
1980 ; NONEON-NOSVE-NEXT: fmul s3, s2, s0
1981 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
1982 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
1983 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
1984 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
1985 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1986 ; NONEON-NOSVE-NEXT: ret
1987 %res = fmul <2 x float> %op1, %op2
1988 ret <2 x float> %res
1991 define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) {
1992 ; CHECK-LABEL: fmul_v4f32:
1994 ; CHECK-NEXT: ptrue p0.s, vl4
1995 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1996 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
1997 ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
1998 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2001 ; NONEON-NOSVE-LABEL: fmul_v4f32:
2002 ; NONEON-NOSVE: // %bb.0:
2003 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
2004 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
2005 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
2006 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
2007 ; NONEON-NOSVE-NEXT: fmul s3, s2, s0
2008 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
2009 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
2010 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
2011 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40]
2012 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
2013 ; NONEON-NOSVE-NEXT: fmul s3, s2, s0
2014 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
2015 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
2016 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32]
2017 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
2018 ; NONEON-NOSVE-NEXT: add sp, sp, #48
2019 ; NONEON-NOSVE-NEXT: ret
2020 %res = fmul <4 x float> %op1, %op2
2021 ret <4 x float> %res
2024 define void @fmul_v8f32(ptr %a, ptr %b) {
2025 ; CHECK-LABEL: fmul_v8f32:
2027 ; CHECK-NEXT: ldp q0, q3, [x1]
2028 ; CHECK-NEXT: ptrue p0.s, vl4
2029 ; CHECK-NEXT: ldp q1, q2, [x0]
2030 ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
2031 ; CHECK-NEXT: movprfx z1, z2
2032 ; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s
2033 ; CHECK-NEXT: stp q0, q1, [x0]
2036 ; NONEON-NOSVE-LABEL: fmul_v8f32:
2037 ; NONEON-NOSVE: // %bb.0:
2038 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
2039 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
2040 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
2041 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
2042 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
2043 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
2044 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40]
2045 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
2046 ; NONEON-NOSVE-NEXT: fmul s3, s2, s0
2047 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
2048 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
2049 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32]
2050 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88]
2051 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
2052 ; NONEON-NOSVE-NEXT: fmul s3, s2, s0
2053 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
2054 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
2055 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
2056 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80]
2057 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
2058 ; NONEON-NOSVE-NEXT: fmul s3, s2, s0
2059 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
2060 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
2061 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
2062 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72]
2063 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
2064 ; NONEON-NOSVE-NEXT: fmul s3, s2, s0
2065 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
2066 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
2067 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64]
2068 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
2069 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
2070 ; NONEON-NOSVE-NEXT: add sp, sp, #96
2071 ; NONEON-NOSVE-NEXT: ret
2072 %op1 = load <8 x float>, ptr %a
2073 %op2 = load <8 x float>, ptr %b
2074 %res = fmul <8 x float> %op1, %op2
2075 store <8 x float> %res, ptr %a
2079 define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) {
2080 ; CHECK-LABEL: fmul_v2f64:
2082 ; CHECK-NEXT: ptrue p0.d, vl2
2083 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
2084 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
2085 ; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
2086 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2089 ; NONEON-NOSVE-LABEL: fmul_v2f64:
2090 ; NONEON-NOSVE: // %bb.0:
2091 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
2092 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
2093 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
2094 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
2095 ; NONEON-NOSVE-NEXT: fmul d3, d2, d0
2096 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
2097 ; NONEON-NOSVE-NEXT: fmul d0, d1, d0
2098 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32]
2099 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
2100 ; NONEON-NOSVE-NEXT: add sp, sp, #48
2101 ; NONEON-NOSVE-NEXT: ret
2102 %res = fmul <2 x double> %op1, %op2
2103 ret <2 x double> %res
2106 define void @fmul_v4f64(ptr %a, ptr %b) {
2107 ; CHECK-LABEL: fmul_v4f64:
2109 ; CHECK-NEXT: ldp q0, q3, [x1]
2110 ; CHECK-NEXT: ptrue p0.d, vl2
2111 ; CHECK-NEXT: ldp q1, q2, [x0]
2112 ; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
2113 ; CHECK-NEXT: movprfx z1, z2
2114 ; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z3.d
2115 ; CHECK-NEXT: stp q0, q1, [x0]
2118 ; NONEON-NOSVE-LABEL: fmul_v4f64:
2119 ; NONEON-NOSVE: // %bb.0:
2120 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
2121 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
2122 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
2123 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
2124 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
2125 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
2126 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32]
2127 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
2128 ; NONEON-NOSVE-NEXT: fmul d3, d2, d0
2129 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #48]
2130 ; NONEON-NOSVE-NEXT: fmul d0, d1, d0
2131 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
2132 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80]
2133 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
2134 ; NONEON-NOSVE-NEXT: fmul d3, d2, d0
2135 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
2136 ; NONEON-NOSVE-NEXT: fmul d0, d1, d0
2137 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64]
2138 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
2139 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
2140 ; NONEON-NOSVE-NEXT: add sp, sp, #96
2141 ; NONEON-NOSVE-NEXT: ret
2142 %op1 = load <4 x double>, ptr %a
2143 %op2 = load <4 x double>, ptr %b
2144 %res = fmul <4 x double> %op1, %op2
2145 store <4 x double> %res, ptr %a
2153 define <2 x half> @fneg_v2f16(<2 x half> %op) {
2154 ; CHECK-LABEL: fneg_v2f16:
2156 ; CHECK-NEXT: ptrue p0.h, vl4
2157 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
2158 ; CHECK-NEXT: fneg z0.h, p0/m, z0.h
2159 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2162 ; NONEON-NOSVE-LABEL: fneg_v2f16:
2163 ; NONEON-NOSVE: // %bb.0:
2164 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
2165 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
2166 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
2167 ; NONEON-NOSVE-NEXT: fmov w8, s0
2168 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2169 ; NONEON-NOSVE-NEXT: fmov s0, w8
2170 ; NONEON-NOSVE-NEXT: str h0, [sp, #14]
2171 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
2172 ; NONEON-NOSVE-NEXT: fmov w8, s0
2173 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2174 ; NONEON-NOSVE-NEXT: fmov s0, w8
2175 ; NONEON-NOSVE-NEXT: str h0, [sp, #12]
2176 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
2177 ; NONEON-NOSVE-NEXT: fmov w8, s0
2178 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2179 ; NONEON-NOSVE-NEXT: fmov s0, w8
2180 ; NONEON-NOSVE-NEXT: str h0, [sp, #10]
2181 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
2182 ; NONEON-NOSVE-NEXT: fmov w8, s0
2183 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2184 ; NONEON-NOSVE-NEXT: fmov s0, w8
2185 ; NONEON-NOSVE-NEXT: str h0, [sp, #8]
2186 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2187 ; NONEON-NOSVE-NEXT: add sp, sp, #16
2188 ; NONEON-NOSVE-NEXT: ret
2189 %res = fneg <2 x half> %op
2193 define <4 x half> @fneg_v4f16(<4 x half> %op) {
2194 ; CHECK-LABEL: fneg_v4f16:
2196 ; CHECK-NEXT: ptrue p0.h, vl4
2197 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
2198 ; CHECK-NEXT: fneg z0.h, p0/m, z0.h
2199 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2202 ; NONEON-NOSVE-LABEL: fneg_v4f16:
2203 ; NONEON-NOSVE: // %bb.0:
2204 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
2205 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
2206 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
2207 ; NONEON-NOSVE-NEXT: fmov w8, s0
2208 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2209 ; NONEON-NOSVE-NEXT: fmov s0, w8
2210 ; NONEON-NOSVE-NEXT: str h0, [sp, #14]
2211 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
2212 ; NONEON-NOSVE-NEXT: fmov w8, s0
2213 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2214 ; NONEON-NOSVE-NEXT: fmov s0, w8
2215 ; NONEON-NOSVE-NEXT: str h0, [sp, #12]
2216 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
2217 ; NONEON-NOSVE-NEXT: fmov w8, s0
2218 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2219 ; NONEON-NOSVE-NEXT: fmov s0, w8
2220 ; NONEON-NOSVE-NEXT: str h0, [sp, #10]
2221 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
2222 ; NONEON-NOSVE-NEXT: fmov w8, s0
2223 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2224 ; NONEON-NOSVE-NEXT: fmov s0, w8
2225 ; NONEON-NOSVE-NEXT: str h0, [sp, #8]
2226 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2227 ; NONEON-NOSVE-NEXT: add sp, sp, #16
2228 ; NONEON-NOSVE-NEXT: ret
2229 %res = fneg <4 x half> %op
2233 define <8 x half> @fneg_v8f16(<8 x half> %op) {
2234 ; CHECK-LABEL: fneg_v8f16:
2236 ; CHECK-NEXT: ptrue p0.h, vl8
2237 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
2238 ; CHECK-NEXT: fneg z0.h, p0/m, z0.h
2239 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2242 ; NONEON-NOSVE-LABEL: fneg_v8f16:
2243 ; NONEON-NOSVE: // %bb.0:
2244 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
2245 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
2246 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
2247 ; NONEON-NOSVE-NEXT: fmov w8, s0
2248 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2249 ; NONEON-NOSVE-NEXT: fmov s0, w8
2250 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
2251 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
2252 ; NONEON-NOSVE-NEXT: fmov w8, s0
2253 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2254 ; NONEON-NOSVE-NEXT: fmov s0, w8
2255 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
2256 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
2257 ; NONEON-NOSVE-NEXT: fmov w8, s0
2258 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2259 ; NONEON-NOSVE-NEXT: fmov s0, w8
2260 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
2261 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
2262 ; NONEON-NOSVE-NEXT: fmov w8, s0
2263 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2264 ; NONEON-NOSVE-NEXT: fmov s0, w8
2265 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
2266 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
2267 ; NONEON-NOSVE-NEXT: fmov w8, s0
2268 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2269 ; NONEON-NOSVE-NEXT: fmov s0, w8
2270 ; NONEON-NOSVE-NEXT: str h0, [sp, #22]
2271 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
2272 ; NONEON-NOSVE-NEXT: fmov w8, s0
2273 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2274 ; NONEON-NOSVE-NEXT: fmov s0, w8
2275 ; NONEON-NOSVE-NEXT: str h0, [sp, #20]
2276 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
2277 ; NONEON-NOSVE-NEXT: fmov w8, s0
2278 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2279 ; NONEON-NOSVE-NEXT: fmov s0, w8
2280 ; NONEON-NOSVE-NEXT: str h0, [sp, #18]
2281 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
2282 ; NONEON-NOSVE-NEXT: fmov w8, s0
2283 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2284 ; NONEON-NOSVE-NEXT: fmov s0, w8
2285 ; NONEON-NOSVE-NEXT: str h0, [sp, #16]
2286 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
2287 ; NONEON-NOSVE-NEXT: add sp, sp, #32
2288 ; NONEON-NOSVE-NEXT: ret
2289 %res = fneg <8 x half> %op
2293 define void @fneg_v16f16(ptr %a, ptr %b) {
2294 ; CHECK-LABEL: fneg_v16f16:
2296 ; CHECK-NEXT: ldp q0, q1, [x0]
2297 ; CHECK-NEXT: ptrue p0.h, vl8
2298 ; CHECK-NEXT: fneg z0.h, p0/m, z0.h
2299 ; CHECK-NEXT: fneg z1.h, p0/m, z1.h
2300 ; CHECK-NEXT: stp q0, q1, [x0]
2303 ; NONEON-NOSVE-LABEL: fneg_v16f16:
2304 ; NONEON-NOSVE: // %bb.0:
2305 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
2306 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
2307 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
2308 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
2309 ; NONEON-NOSVE-NEXT: fmov w8, s0
2310 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2311 ; NONEON-NOSVE-NEXT: fmov s0, w8
2312 ; NONEON-NOSVE-NEXT: str h0, [sp, #62]
2313 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
2314 ; NONEON-NOSVE-NEXT: fmov w8, s0
2315 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2316 ; NONEON-NOSVE-NEXT: fmov s0, w8
2317 ; NONEON-NOSVE-NEXT: str h0, [sp, #60]
2318 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
2319 ; NONEON-NOSVE-NEXT: fmov w8, s0
2320 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2321 ; NONEON-NOSVE-NEXT: fmov s0, w8
2322 ; NONEON-NOSVE-NEXT: str h0, [sp, #58]
2323 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
2324 ; NONEON-NOSVE-NEXT: fmov w8, s0
2325 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2326 ; NONEON-NOSVE-NEXT: fmov s0, w8
2327 ; NONEON-NOSVE-NEXT: str h0, [sp, #56]
2328 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
2329 ; NONEON-NOSVE-NEXT: fmov w8, s0
2330 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2331 ; NONEON-NOSVE-NEXT: fmov s0, w8
2332 ; NONEON-NOSVE-NEXT: str h0, [sp, #54]
2333 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
2334 ; NONEON-NOSVE-NEXT: fmov w8, s0
2335 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2336 ; NONEON-NOSVE-NEXT: fmov s0, w8
2337 ; NONEON-NOSVE-NEXT: str h0, [sp, #52]
2338 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
2339 ; NONEON-NOSVE-NEXT: fmov w8, s0
2340 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2341 ; NONEON-NOSVE-NEXT: fmov s0, w8
2342 ; NONEON-NOSVE-NEXT: str h0, [sp, #50]
2343 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
2344 ; NONEON-NOSVE-NEXT: fmov w8, s0
2345 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2346 ; NONEON-NOSVE-NEXT: fmov s0, w8
2347 ; NONEON-NOSVE-NEXT: str h0, [sp, #48]
2348 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
2349 ; NONEON-NOSVE-NEXT: fmov w8, s0
2350 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2351 ; NONEON-NOSVE-NEXT: fmov s0, w8
2352 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
2353 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
2354 ; NONEON-NOSVE-NEXT: fmov w8, s0
2355 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2356 ; NONEON-NOSVE-NEXT: fmov s0, w8
2357 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
2358 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
2359 ; NONEON-NOSVE-NEXT: fmov w8, s0
2360 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2361 ; NONEON-NOSVE-NEXT: fmov s0, w8
2362 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
2363 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
2364 ; NONEON-NOSVE-NEXT: fmov w8, s0
2365 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2366 ; NONEON-NOSVE-NEXT: fmov s0, w8
2367 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
2368 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
2369 ; NONEON-NOSVE-NEXT: fmov w8, s0
2370 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2371 ; NONEON-NOSVE-NEXT: fmov s0, w8
2372 ; NONEON-NOSVE-NEXT: str h0, [sp, #38]
2373 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
2374 ; NONEON-NOSVE-NEXT: fmov w8, s0
2375 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2376 ; NONEON-NOSVE-NEXT: fmov s0, w8
2377 ; NONEON-NOSVE-NEXT: str h0, [sp, #36]
2378 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
2379 ; NONEON-NOSVE-NEXT: fmov w8, s0
2380 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2381 ; NONEON-NOSVE-NEXT: fmov s0, w8
2382 ; NONEON-NOSVE-NEXT: str h0, [sp, #34]
2383 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
2384 ; NONEON-NOSVE-NEXT: fmov w8, s0
2385 ; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
2386 ; NONEON-NOSVE-NEXT: fmov s0, w8
2387 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
2388 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
2389 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
2390 ; NONEON-NOSVE-NEXT: add sp, sp, #64
2391 ; NONEON-NOSVE-NEXT: ret
2392 %op = load <16 x half>, ptr %a
2393 %res = fneg <16 x half> %op
2394 store <16 x half> %res, ptr %a
2398 define <2 x float> @fneg_v2f32(<2 x float> %op) {
2399 ; CHECK-LABEL: fneg_v2f32:
2401 ; CHECK-NEXT: ptrue p0.s, vl2
2402 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
2403 ; CHECK-NEXT: fneg z0.s, p0/m, z0.s
2404 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2407 ; NONEON-NOSVE-LABEL: fneg_v2f32:
2408 ; NONEON-NOSVE: // %bb.0:
2409 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
2410 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
2411 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
2412 ; NONEON-NOSVE-NEXT: fneg s1, s0
2413 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
2414 ; NONEON-NOSVE-NEXT: fneg s0, s0
2415 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8]
2416 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2417 ; NONEON-NOSVE-NEXT: add sp, sp, #16
2418 ; NONEON-NOSVE-NEXT: ret
2419 %res = fneg <2 x float> %op
2420 ret <2 x float> %res
2423 define <4 x float> @fneg_v4f32(<4 x float> %op) {
2424 ; CHECK-LABEL: fneg_v4f32:
2426 ; CHECK-NEXT: ptrue p0.s, vl4
2427 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
2428 ; CHECK-NEXT: fneg z0.s, p0/m, z0.s
2429 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2432 ; NONEON-NOSVE-LABEL: fneg_v4f32:
2433 ; NONEON-NOSVE: // %bb.0:
2434 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
2435 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
2436 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
2437 ; NONEON-NOSVE-NEXT: fneg s1, s0
2438 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
2439 ; NONEON-NOSVE-NEXT: fneg s0, s0
2440 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
2441 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
2442 ; NONEON-NOSVE-NEXT: fneg s1, s0
2443 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
2444 ; NONEON-NOSVE-NEXT: fneg s0, s0
2445 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
2446 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
2447 ; NONEON-NOSVE-NEXT: add sp, sp, #32
2448 ; NONEON-NOSVE-NEXT: ret
2449 %res = fneg <4 x float> %op
2450 ret <4 x float> %res
2453 define void @fneg_v8f32(ptr %a) {
2454 ; CHECK-LABEL: fneg_v8f32:
2456 ; CHECK-NEXT: ldp q0, q1, [x0]
2457 ; CHECK-NEXT: ptrue p0.s, vl4
2458 ; CHECK-NEXT: fneg z0.s, p0/m, z0.s
2459 ; CHECK-NEXT: fneg z1.s, p0/m, z1.s
2460 ; CHECK-NEXT: stp q0, q1, [x0]
2463 ; NONEON-NOSVE-LABEL: fneg_v8f32:
2464 ; NONEON-NOSVE: // %bb.0:
2465 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
2466 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
2467 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
2468 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
2469 ; NONEON-NOSVE-NEXT: fneg s1, s0
2470 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
2471 ; NONEON-NOSVE-NEXT: fneg s0, s0
2472 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56]
2473 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
2474 ; NONEON-NOSVE-NEXT: fneg s1, s0
2475 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
2476 ; NONEON-NOSVE-NEXT: fneg s0, s0
2477 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48]
2478 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
2479 ; NONEON-NOSVE-NEXT: fneg s1, s0
2480 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
2481 ; NONEON-NOSVE-NEXT: fneg s0, s0
2482 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
2483 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
2484 ; NONEON-NOSVE-NEXT: fneg s1, s0
2485 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
2486 ; NONEON-NOSVE-NEXT: fneg s0, s0
2487 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
2488 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
2489 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
2490 ; NONEON-NOSVE-NEXT: add sp, sp, #64
2491 ; NONEON-NOSVE-NEXT: ret
2492 %op = load <8 x float>, ptr %a
2493 %res = fneg <8 x float> %op
2494 store <8 x float> %res, ptr %a
2498 define <2 x double> @fneg_v2f64(<2 x double> %op) {
2499 ; CHECK-LABEL: fneg_v2f64:
2501 ; CHECK-NEXT: ptrue p0.d, vl2
2502 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
2503 ; CHECK-NEXT: fneg z0.d, p0/m, z0.d
2504 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2507 ; NONEON-NOSVE-LABEL: fneg_v2f64:
2508 ; NONEON-NOSVE: // %bb.0:
2509 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
2510 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
2511 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2512 ; NONEON-NOSVE-NEXT: fneg d1, d0
2513 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
2514 ; NONEON-NOSVE-NEXT: fneg d0, d0
2515 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
2516 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
2517 ; NONEON-NOSVE-NEXT: add sp, sp, #32
2518 ; NONEON-NOSVE-NEXT: ret
2519 %res = fneg <2 x double> %op
2520 ret <2 x double> %res
2523 define void @fneg_v4f64(ptr %a) {
2524 ; CHECK-LABEL: fneg_v4f64:
2526 ; CHECK-NEXT: ldp q0, q1, [x0]
2527 ; CHECK-NEXT: ptrue p0.d, vl2
2528 ; CHECK-NEXT: fneg z0.d, p0/m, z0.d
2529 ; CHECK-NEXT: fneg z1.d, p0/m, z1.d
2530 ; CHECK-NEXT: stp q0, q1, [x0]
2533 ; NONEON-NOSVE-LABEL: fneg_v4f64:
2534 ; NONEON-NOSVE: // %bb.0:
2535 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
2536 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
2537 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
2538 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
2539 ; NONEON-NOSVE-NEXT: fneg d1, d0
2540 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
2541 ; NONEON-NOSVE-NEXT: fneg d0, d0
2542 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
2543 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2544 ; NONEON-NOSVE-NEXT: fneg d1, d0
2545 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
2546 ; NONEON-NOSVE-NEXT: fneg d0, d0
2547 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
2548 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
2549 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
2550 ; NONEON-NOSVE-NEXT: add sp, sp, #64
2551 ; NONEON-NOSVE-NEXT: ret
2552 %op = load <4 x double>, ptr %a
2553 %res = fneg <4 x double> %op
2554 store <4 x double> %res, ptr %a
2562 define <2 x half> @fsqrt_v2f16(<2 x half> %op) {
2563 ; CHECK-LABEL: fsqrt_v2f16:
2565 ; CHECK-NEXT: ptrue p0.h, vl4
2566 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
2567 ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
2568 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2571 ; NONEON-NOSVE-LABEL: fsqrt_v2f16:
2572 ; NONEON-NOSVE: // %bb.0:
2573 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
2574 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
2575 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
2576 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2577 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2578 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2579 ; NONEON-NOSVE-NEXT: str h0, [sp, #14]
2580 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
2581 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2582 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2583 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2584 ; NONEON-NOSVE-NEXT: str h0, [sp, #12]
2585 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
2586 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2587 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2588 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2589 ; NONEON-NOSVE-NEXT: str h0, [sp, #10]
2590 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
2591 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2592 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2593 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2594 ; NONEON-NOSVE-NEXT: str h0, [sp, #8]
2595 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2596 ; NONEON-NOSVE-NEXT: add sp, sp, #16
2597 ; NONEON-NOSVE-NEXT: ret
2598 %res = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %op)
2602 define <4 x half> @fsqrt_v4f16(<4 x half> %op) {
2603 ; CHECK-LABEL: fsqrt_v4f16:
2605 ; CHECK-NEXT: ptrue p0.h, vl4
2606 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
2607 ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
2608 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2611 ; NONEON-NOSVE-LABEL: fsqrt_v4f16:
2612 ; NONEON-NOSVE: // %bb.0:
2613 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
2614 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
2615 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
2616 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2617 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2618 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2619 ; NONEON-NOSVE-NEXT: str h0, [sp, #14]
2620 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
2621 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2622 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2623 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2624 ; NONEON-NOSVE-NEXT: str h0, [sp, #12]
2625 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
2626 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2627 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2628 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2629 ; NONEON-NOSVE-NEXT: str h0, [sp, #10]
2630 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
2631 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2632 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2633 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2634 ; NONEON-NOSVE-NEXT: str h0, [sp, #8]
2635 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2636 ; NONEON-NOSVE-NEXT: add sp, sp, #16
2637 ; NONEON-NOSVE-NEXT: ret
2638 %res = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %op)
2642 define <8 x half> @fsqrt_v8f16(<8 x half> %op) {
2643 ; CHECK-LABEL: fsqrt_v8f16:
2645 ; CHECK-NEXT: ptrue p0.h, vl8
2646 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
2647 ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
2648 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2651 ; NONEON-NOSVE-LABEL: fsqrt_v8f16:
2652 ; NONEON-NOSVE: // %bb.0:
2653 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
2654 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
2655 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
2656 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2657 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2658 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2659 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
2660 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
2661 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2662 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2663 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2664 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
2665 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
2666 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2667 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2668 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2669 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
2670 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
2671 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2672 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2673 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2674 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
2675 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
2676 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2677 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2678 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2679 ; NONEON-NOSVE-NEXT: str h0, [sp, #22]
2680 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
2681 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2682 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2683 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2684 ; NONEON-NOSVE-NEXT: str h0, [sp, #20]
2685 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
2686 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2687 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2688 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2689 ; NONEON-NOSVE-NEXT: str h0, [sp, #18]
2690 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
2691 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2692 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2693 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2694 ; NONEON-NOSVE-NEXT: str h0, [sp, #16]
2695 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
2696 ; NONEON-NOSVE-NEXT: add sp, sp, #32
2697 ; NONEON-NOSVE-NEXT: ret
2698 %res = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %op)
2702 define void @fsqrt_v16f16(ptr %a, ptr %b) {
2703 ; CHECK-LABEL: fsqrt_v16f16:
2705 ; CHECK-NEXT: ldp q0, q1, [x0]
2706 ; CHECK-NEXT: ptrue p0.h, vl8
2707 ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
2708 ; CHECK-NEXT: fsqrt z1.h, p0/m, z1.h
2709 ; CHECK-NEXT: stp q0, q1, [x0]
2712 ; NONEON-NOSVE-LABEL: fsqrt_v16f16:
2713 ; NONEON-NOSVE: // %bb.0:
2714 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
2715 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
2716 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
2717 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
2718 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2719 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2720 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2721 ; NONEON-NOSVE-NEXT: str h0, [sp, #62]
2722 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
2723 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2724 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2725 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2726 ; NONEON-NOSVE-NEXT: str h0, [sp, #60]
2727 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
2728 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2729 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2730 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2731 ; NONEON-NOSVE-NEXT: str h0, [sp, #58]
2732 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
2733 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2734 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2735 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2736 ; NONEON-NOSVE-NEXT: str h0, [sp, #56]
2737 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
2738 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2739 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2740 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2741 ; NONEON-NOSVE-NEXT: str h0, [sp, #54]
2742 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
2743 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2744 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2745 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2746 ; NONEON-NOSVE-NEXT: str h0, [sp, #52]
2747 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
2748 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2749 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2750 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2751 ; NONEON-NOSVE-NEXT: str h0, [sp, #50]
2752 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
2753 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2754 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2755 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2756 ; NONEON-NOSVE-NEXT: str h0, [sp, #48]
2757 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
2758 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2759 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2760 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2761 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
2762 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
2763 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2764 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2765 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2766 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
2767 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
2768 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2769 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2770 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2771 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
2772 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
2773 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2774 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2775 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2776 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
2777 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
2778 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2779 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2780 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2781 ; NONEON-NOSVE-NEXT: str h0, [sp, #38]
2782 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
2783 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2784 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2785 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2786 ; NONEON-NOSVE-NEXT: str h0, [sp, #36]
2787 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
2788 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2789 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2790 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2791 ; NONEON-NOSVE-NEXT: str h0, [sp, #34]
2792 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
2793 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2794 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2795 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2796 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
2797 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
2798 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
2799 ; NONEON-NOSVE-NEXT: add sp, sp, #64
2800 ; NONEON-NOSVE-NEXT: ret
2801 %op = load <16 x half>, ptr %a
2802 %res = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %op)
2803 store <16 x half> %res, ptr %a
2807 define <2 x float> @fsqrt_v2f32(<2 x float> %op) {
2808 ; CHECK-LABEL: fsqrt_v2f32:
2810 ; CHECK-NEXT: ptrue p0.s, vl2
2811 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
2812 ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
2813 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2816 ; NONEON-NOSVE-LABEL: fsqrt_v2f32:
2817 ; NONEON-NOSVE: // %bb.0:
2818 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
2819 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
2820 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
2821 ; NONEON-NOSVE-NEXT: fsqrt s1, s0
2822 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
2823 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2824 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8]
2825 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2826 ; NONEON-NOSVE-NEXT: add sp, sp, #16
2827 ; NONEON-NOSVE-NEXT: ret
2828 %res = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %op)
2829 ret <2 x float> %res
2832 define <4 x float> @fsqrt_v4f32(<4 x float> %op) {
2833 ; CHECK-LABEL: fsqrt_v4f32:
2835 ; CHECK-NEXT: ptrue p0.s, vl4
2836 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
2837 ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
2838 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2841 ; NONEON-NOSVE-LABEL: fsqrt_v4f32:
2842 ; NONEON-NOSVE: // %bb.0:
2843 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
2844 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
2845 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
2846 ; NONEON-NOSVE-NEXT: fsqrt s1, s0
2847 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
2848 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2849 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
2850 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
2851 ; NONEON-NOSVE-NEXT: fsqrt s1, s0
2852 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
2853 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2854 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
2855 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
2856 ; NONEON-NOSVE-NEXT: add sp, sp, #32
2857 ; NONEON-NOSVE-NEXT: ret
2858 %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %op)
2859 ret <4 x float> %res
2862 define void @fsqrt_v8f32(ptr %a) {
2863 ; CHECK-LABEL: fsqrt_v8f32:
2865 ; CHECK-NEXT: ldp q0, q1, [x0]
2866 ; CHECK-NEXT: ptrue p0.s, vl4
2867 ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
2868 ; CHECK-NEXT: fsqrt z1.s, p0/m, z1.s
2869 ; CHECK-NEXT: stp q0, q1, [x0]
2872 ; NONEON-NOSVE-LABEL: fsqrt_v8f32:
2873 ; NONEON-NOSVE: // %bb.0:
2874 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
2875 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
2876 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
2877 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
2878 ; NONEON-NOSVE-NEXT: fsqrt s1, s0
2879 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
2880 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2881 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56]
2882 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
2883 ; NONEON-NOSVE-NEXT: fsqrt s1, s0
2884 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
2885 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2886 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48]
2887 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
2888 ; NONEON-NOSVE-NEXT: fsqrt s1, s0
2889 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
2890 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2891 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
2892 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
2893 ; NONEON-NOSVE-NEXT: fsqrt s1, s0
2894 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
2895 ; NONEON-NOSVE-NEXT: fsqrt s0, s0
2896 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
2897 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
2898 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
2899 ; NONEON-NOSVE-NEXT: add sp, sp, #64
2900 ; NONEON-NOSVE-NEXT: ret
2901 %op = load <8 x float>, ptr %a
2902 %res = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %op)
2903 store <8 x float> %res, ptr %a
2907 define <2 x double> @fsqrt_v2f64(<2 x double> %op) {
2908 ; CHECK-LABEL: fsqrt_v2f64:
2910 ; CHECK-NEXT: ptrue p0.d, vl2
2911 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
2912 ; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d
2913 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
2916 ; NONEON-NOSVE-LABEL: fsqrt_v2f64:
2917 ; NONEON-NOSVE: // %bb.0:
2918 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
2919 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
2920 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2921 ; NONEON-NOSVE-NEXT: fsqrt d1, d0
2922 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
2923 ; NONEON-NOSVE-NEXT: fsqrt d0, d0
2924 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
2925 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
2926 ; NONEON-NOSVE-NEXT: add sp, sp, #32
2927 ; NONEON-NOSVE-NEXT: ret
2928 %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %op)
2929 ret <2 x double> %res
2932 define void @fsqrt_v4f64(ptr %a) {
2933 ; CHECK-LABEL: fsqrt_v4f64:
2935 ; CHECK-NEXT: ldp q0, q1, [x0]
2936 ; CHECK-NEXT: ptrue p0.d, vl2
2937 ; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d
2938 ; CHECK-NEXT: fsqrt z1.d, p0/m, z1.d
2939 ; CHECK-NEXT: stp q0, q1, [x0]
2942 ; NONEON-NOSVE-LABEL: fsqrt_v4f64:
2943 ; NONEON-NOSVE: // %bb.0:
2944 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
2945 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
2946 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
2947 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
2948 ; NONEON-NOSVE-NEXT: fsqrt d1, d0
2949 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
2950 ; NONEON-NOSVE-NEXT: fsqrt d0, d0
2951 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
2952 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
2953 ; NONEON-NOSVE-NEXT: fsqrt d1, d0
2954 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
2955 ; NONEON-NOSVE-NEXT: fsqrt d0, d0
2956 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
2957 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
2958 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
2959 ; NONEON-NOSVE-NEXT: add sp, sp, #64
2960 ; NONEON-NOSVE-NEXT: ret
2961 %op = load <4 x double>, ptr %a
2962 %res = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %op)
2963 store <4 x double> %res, ptr %a
2971 define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) {
2972 ; CHECK-LABEL: fsub_v2f16:
2974 ; CHECK-NEXT: ptrue p0.h, vl4
2975 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
2976 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
2977 ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
2978 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2981 ; NONEON-NOSVE-LABEL: fsub_v2f16:
2982 ; NONEON-NOSVE: // %bb.0:
2983 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
2984 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
2985 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
2986 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
2987 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
2988 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2989 ; NONEON-NOSVE-NEXT: fcvt s1, h1
2990 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
2991 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
2992 ; NONEON-NOSVE-NEXT: fcvt s1, h1
2993 ; NONEON-NOSVE-NEXT: fcvt h0, s0
2994 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
2995 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
2996 ; NONEON-NOSVE-NEXT: fcvt s0, h0
2997 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
2998 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
2999 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3000 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3001 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
3002 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
3003 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3004 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3005 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
3006 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3007 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3008 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
3009 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
3010 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3011 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3012 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3013 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
3014 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
3015 ; NONEON-NOSVE-NEXT: add sp, sp, #32
3016 ; NONEON-NOSVE-NEXT: ret
3017 %res = fsub <2 x half> %op1, %op2
3021 define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) {
3022 ; CHECK-LABEL: fsub_v4f16:
3024 ; CHECK-NEXT: ptrue p0.h, vl4
3025 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
3026 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
3027 ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
3028 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
3031 ; NONEON-NOSVE-LABEL: fsub_v4f16:
3032 ; NONEON-NOSVE: // %bb.0:
3033 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
3034 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
3035 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
3036 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
3037 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
3038 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3039 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3040 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3041 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
3042 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3043 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3044 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
3045 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
3046 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3047 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3048 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
3049 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3050 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3051 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
3052 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
3053 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3054 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3055 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
3056 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3057 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3058 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
3059 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
3060 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3061 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3062 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3063 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
3064 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
3065 ; NONEON-NOSVE-NEXT: add sp, sp, #32
3066 ; NONEON-NOSVE-NEXT: ret
3067 %res = fsub <4 x half> %op1, %op2
3071 define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) {
3072 ; CHECK-LABEL: fsub_v8f16:
3074 ; CHECK-NEXT: ptrue p0.h, vl8
3075 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
3076 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
3077 ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
3078 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
3081 ; NONEON-NOSVE-LABEL: fsub_v8f16:
3082 ; NONEON-NOSVE: // %bb.0:
3083 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
3084 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
3085 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
3086 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
3087 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3088 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3089 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3090 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
3091 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3092 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3093 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
3094 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
3095 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3096 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3097 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
3098 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3099 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3100 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
3101 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
3102 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3103 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3104 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
3105 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3106 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3107 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
3108 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
3109 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3110 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3111 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
3112 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3113 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3114 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
3115 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
3116 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3117 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3118 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
3119 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3120 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3121 ; NONEON-NOSVE-NEXT: str h0, [sp, #38]
3122 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
3123 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3124 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3125 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
3126 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3127 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3128 ; NONEON-NOSVE-NEXT: str h0, [sp, #36]
3129 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
3130 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3131 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3132 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
3133 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3134 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3135 ; NONEON-NOSVE-NEXT: str h0, [sp, #34]
3136 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
3137 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3138 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3139 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3140 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
3141 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
3142 ; NONEON-NOSVE-NEXT: add sp, sp, #48
3143 ; NONEON-NOSVE-NEXT: ret
3144 %res = fsub <8 x half> %op1, %op2
3148 define void @fsub_v16f16(ptr %a, ptr %b) {
3149 ; CHECK-LABEL: fsub_v16f16:
3151 ; CHECK-NEXT: ldp q0, q3, [x1]
3152 ; CHECK-NEXT: ptrue p0.h, vl8
3153 ; CHECK-NEXT: ldp q1, q2, [x0]
3154 ; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, z1.h
3155 ; CHECK-NEXT: movprfx z1, z2
3156 ; CHECK-NEXT: fsub z1.h, p0/m, z1.h, z3.h
3157 ; CHECK-NEXT: stp q0, q1, [x0]
3160 ; NONEON-NOSVE-LABEL: fsub_v16f16:
3161 ; NONEON-NOSVE: // %bb.0:
3162 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
3163 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
3164 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
3165 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
3166 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
3167 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
3168 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
3169 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #46]
3170 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3171 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3172 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3173 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #44]
3174 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3175 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3176 ; NONEON-NOSVE-NEXT: str h0, [sp, #94]
3177 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
3178 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3179 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3180 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #42]
3181 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3182 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3183 ; NONEON-NOSVE-NEXT: str h0, [sp, #92]
3184 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
3185 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3186 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3187 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #40]
3188 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3189 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3190 ; NONEON-NOSVE-NEXT: str h0, [sp, #90]
3191 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
3192 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3193 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3194 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #38]
3195 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3196 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3197 ; NONEON-NOSVE-NEXT: str h0, [sp, #88]
3198 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
3199 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3200 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3201 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #36]
3202 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3203 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3204 ; NONEON-NOSVE-NEXT: str h0, [sp, #86]
3205 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
3206 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3207 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3208 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #34]
3209 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3210 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3211 ; NONEON-NOSVE-NEXT: str h0, [sp, #84]
3212 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
3213 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3214 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3215 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
3216 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3217 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3218 ; NONEON-NOSVE-NEXT: str h0, [sp, #82]
3219 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
3220 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3221 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3222 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
3223 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3224 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3225 ; NONEON-NOSVE-NEXT: str h0, [sp, #80]
3226 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
3227 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3228 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3229 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
3230 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3231 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3232 ; NONEON-NOSVE-NEXT: str h0, [sp, #78]
3233 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
3234 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3235 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3236 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
3237 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3238 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3239 ; NONEON-NOSVE-NEXT: str h0, [sp, #76]
3240 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
3241 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3242 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3243 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
3244 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3245 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3246 ; NONEON-NOSVE-NEXT: str h0, [sp, #74]
3247 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
3248 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3249 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3250 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
3251 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3252 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3253 ; NONEON-NOSVE-NEXT: str h0, [sp, #72]
3254 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
3255 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3256 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3257 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
3258 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3259 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3260 ; NONEON-NOSVE-NEXT: str h0, [sp, #70]
3261 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
3262 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3263 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3264 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
3265 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3266 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3267 ; NONEON-NOSVE-NEXT: str h0, [sp, #68]
3268 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
3269 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3270 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3271 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
3272 ; NONEON-NOSVE-NEXT: fcvt s1, h1
3273 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3274 ; NONEON-NOSVE-NEXT: str h0, [sp, #66]
3275 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
3276 ; NONEON-NOSVE-NEXT: fcvt s0, h0
3277 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3278 ; NONEON-NOSVE-NEXT: fcvt h0, s0
3279 ; NONEON-NOSVE-NEXT: str h0, [sp, #64]
3280 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
3281 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
3282 ; NONEON-NOSVE-NEXT: add sp, sp, #96
3283 ; NONEON-NOSVE-NEXT: ret
3284 %op1 = load <16 x half>, ptr %a
3285 %op2 = load <16 x half>, ptr %b
3286 %res = fsub <16 x half> %op1, %op2
3287 store <16 x half> %res, ptr %a
3291 define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) {
3292 ; CHECK-LABEL: fsub_v2f32:
3294 ; CHECK-NEXT: ptrue p0.s, vl2
3295 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
3296 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
3297 ; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
3298 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
3301 ; NONEON-NOSVE-LABEL: fsub_v2f32:
3302 ; NONEON-NOSVE: // %bb.0:
3303 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
3304 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
3305 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
3306 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
3307 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
3308 ; NONEON-NOSVE-NEXT: fsub s3, s2, s0
3309 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
3310 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3311 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
3312 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
3313 ; NONEON-NOSVE-NEXT: add sp, sp, #32
3314 ; NONEON-NOSVE-NEXT: ret
3315 %res = fsub <2 x float> %op1, %op2
3316 ret <2 x float> %res
3319 define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) {
3320 ; CHECK-LABEL: fsub_v4f32:
3322 ; CHECK-NEXT: ptrue p0.s, vl4
3323 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
3324 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
3325 ; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
3326 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
3329 ; NONEON-NOSVE-LABEL: fsub_v4f32:
3330 ; NONEON-NOSVE: // %bb.0:
3331 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
3332 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
3333 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
3334 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
3335 ; NONEON-NOSVE-NEXT: fsub s3, s2, s0
3336 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
3337 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3338 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
3339 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40]
3340 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
3341 ; NONEON-NOSVE-NEXT: fsub s3, s2, s0
3342 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
3343 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3344 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32]
3345 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
3346 ; NONEON-NOSVE-NEXT: add sp, sp, #48
3347 ; NONEON-NOSVE-NEXT: ret
3348 %res = fsub <4 x float> %op1, %op2
3349 ret <4 x float> %res
3352 define void @fsub_v8f32(ptr %a, ptr %b) {
3353 ; CHECK-LABEL: fsub_v8f32:
3355 ; CHECK-NEXT: ldp q0, q3, [x1]
3356 ; CHECK-NEXT: ptrue p0.s, vl4
3357 ; CHECK-NEXT: ldp q1, q2, [x0]
3358 ; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s
3359 ; CHECK-NEXT: movprfx z1, z2
3360 ; CHECK-NEXT: fsub z1.s, p0/m, z1.s, z3.s
3361 ; CHECK-NEXT: stp q0, q1, [x0]
3364 ; NONEON-NOSVE-LABEL: fsub_v8f32:
3365 ; NONEON-NOSVE: // %bb.0:
3366 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
3367 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
3368 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
3369 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
3370 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
3371 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
3372 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40]
3373 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
3374 ; NONEON-NOSVE-NEXT: fsub s3, s2, s0
3375 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
3376 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3377 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32]
3378 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88]
3379 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
3380 ; NONEON-NOSVE-NEXT: fsub s3, s2, s0
3381 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
3382 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3383 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
3384 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80]
3385 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
3386 ; NONEON-NOSVE-NEXT: fsub s3, s2, s0
3387 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
3388 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3389 ; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
3390 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72]
3391 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
3392 ; NONEON-NOSVE-NEXT: fsub s3, s2, s0
3393 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
3394 ; NONEON-NOSVE-NEXT: fsub s0, s1, s0
3395 ; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64]
3396 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
3397 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
3398 ; NONEON-NOSVE-NEXT: add sp, sp, #96
3399 ; NONEON-NOSVE-NEXT: ret
3400 %op1 = load <8 x float>, ptr %a
3401 %op2 = load <8 x float>, ptr %b
3402 %res = fsub <8 x float> %op1, %op2
3403 store <8 x float> %res, ptr %a
3407 define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) {
3408 ; CHECK-LABEL: fsub_v2f64:
3410 ; CHECK-NEXT: ptrue p0.d, vl2
3411 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
3412 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
3413 ; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
3414 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
3417 ; NONEON-NOSVE-LABEL: fsub_v2f64:
3418 ; NONEON-NOSVE: // %bb.0:
3419 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
3420 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
3421 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
3422 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
3423 ; NONEON-NOSVE-NEXT: fsub d3, d2, d0
3424 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
3425 ; NONEON-NOSVE-NEXT: fsub d0, d1, d0
3426 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32]
3427 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
3428 ; NONEON-NOSVE-NEXT: add sp, sp, #48
3429 ; NONEON-NOSVE-NEXT: ret
3430 %res = fsub <2 x double> %op1, %op2
3431 ret <2 x double> %res
3434 define void @fsub_v4f64(ptr %a, ptr %b) {
3435 ; CHECK-LABEL: fsub_v4f64:
3437 ; CHECK-NEXT: ldp q0, q3, [x1]
3438 ; CHECK-NEXT: ptrue p0.d, vl2
3439 ; CHECK-NEXT: ldp q1, q2, [x0]
3440 ; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d
3441 ; CHECK-NEXT: movprfx z1, z2
3442 ; CHECK-NEXT: fsub z1.d, p0/m, z1.d, z3.d
3443 ; CHECK-NEXT: stp q0, q1, [x0]
3446 ; NONEON-NOSVE-LABEL: fsub_v4f64:
3447 ; NONEON-NOSVE: // %bb.0:
3448 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
3449 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
3450 ; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
3451 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
3452 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
3453 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
3454 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32]
3455 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
3456 ; NONEON-NOSVE-NEXT: fsub d3, d2, d0
3457 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #48]
3458 ; NONEON-NOSVE-NEXT: fsub d0, d1, d0
3459 ; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
3460 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80]
3461 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
3462 ; NONEON-NOSVE-NEXT: fsub d3, d2, d0
3463 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
3464 ; NONEON-NOSVE-NEXT: fsub d0, d1, d0
3465 ; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64]
3466 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
3467 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
3468 ; NONEON-NOSVE-NEXT: add sp, sp, #96
3469 ; NONEON-NOSVE-NEXT: ret
3470 %op1 = load <4 x double>, ptr %a
3471 %op2 = load <4 x double>, ptr %b
3472 %res = fsub <4 x double> %op1, %op2
3473 store <4 x double> %res, ptr %a
3481 define <2 x half> @fabs_v2f16(<2 x half> %op) {
3482 ; CHECK-LABEL: fabs_v2f16:
3484 ; CHECK-NEXT: ptrue p0.h, vl4
3485 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
3486 ; CHECK-NEXT: fabs z0.h, p0/m, z0.h
3487 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
3490 ; NONEON-NOSVE-LABEL: fabs_v2f16:
3491 ; NONEON-NOSVE: // %bb.0:
3492 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
3493 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
3494 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
3495 ; NONEON-NOSVE-NEXT: fmov w8, s0
3496 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3497 ; NONEON-NOSVE-NEXT: fmov s0, w8
3498 ; NONEON-NOSVE-NEXT: str h0, [sp, #14]
3499 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
3500 ; NONEON-NOSVE-NEXT: fmov w8, s0
3501 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3502 ; NONEON-NOSVE-NEXT: fmov s0, w8
3503 ; NONEON-NOSVE-NEXT: str h0, [sp, #12]
3504 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
3505 ; NONEON-NOSVE-NEXT: fmov w8, s0
3506 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3507 ; NONEON-NOSVE-NEXT: fmov s0, w8
3508 ; NONEON-NOSVE-NEXT: str h0, [sp, #10]
3509 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
3510 ; NONEON-NOSVE-NEXT: fmov w8, s0
3511 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3512 ; NONEON-NOSVE-NEXT: fmov s0, w8
3513 ; NONEON-NOSVE-NEXT: str h0, [sp, #8]
3514 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
3515 ; NONEON-NOSVE-NEXT: add sp, sp, #16
3516 ; NONEON-NOSVE-NEXT: ret
3517 %res = call <2 x half> @llvm.fabs.v2f16(<2 x half> %op)
3521 define <4 x half> @fabs_v4f16(<4 x half> %op) {
3522 ; CHECK-LABEL: fabs_v4f16:
3524 ; CHECK-NEXT: ptrue p0.h, vl4
3525 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
3526 ; CHECK-NEXT: fabs z0.h, p0/m, z0.h
3527 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
3530 ; NONEON-NOSVE-LABEL: fabs_v4f16:
3531 ; NONEON-NOSVE: // %bb.0:
3532 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
3533 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
3534 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
3535 ; NONEON-NOSVE-NEXT: fmov w8, s0
3536 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3537 ; NONEON-NOSVE-NEXT: fmov s0, w8
3538 ; NONEON-NOSVE-NEXT: str h0, [sp, #14]
3539 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
3540 ; NONEON-NOSVE-NEXT: fmov w8, s0
3541 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3542 ; NONEON-NOSVE-NEXT: fmov s0, w8
3543 ; NONEON-NOSVE-NEXT: str h0, [sp, #12]
3544 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
3545 ; NONEON-NOSVE-NEXT: fmov w8, s0
3546 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3547 ; NONEON-NOSVE-NEXT: fmov s0, w8
3548 ; NONEON-NOSVE-NEXT: str h0, [sp, #10]
3549 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
3550 ; NONEON-NOSVE-NEXT: fmov w8, s0
3551 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3552 ; NONEON-NOSVE-NEXT: fmov s0, w8
3553 ; NONEON-NOSVE-NEXT: str h0, [sp, #8]
3554 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
3555 ; NONEON-NOSVE-NEXT: add sp, sp, #16
3556 ; NONEON-NOSVE-NEXT: ret
3557 %res = call <4 x half> @llvm.fabs.v4f16(<4 x half> %op)
3561 define <8 x half> @fabs_v8f16(<8 x half> %op) {
3562 ; CHECK-LABEL: fabs_v8f16:
3564 ; CHECK-NEXT: ptrue p0.h, vl8
3565 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
3566 ; CHECK-NEXT: fabs z0.h, p0/m, z0.h
3567 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
3570 ; NONEON-NOSVE-LABEL: fabs_v8f16:
3571 ; NONEON-NOSVE: // %bb.0:
3572 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
3573 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
3574 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
3575 ; NONEON-NOSVE-NEXT: fmov w8, s0
3576 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3577 ; NONEON-NOSVE-NEXT: fmov s0, w8
3578 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
3579 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
3580 ; NONEON-NOSVE-NEXT: fmov w8, s0
3581 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3582 ; NONEON-NOSVE-NEXT: fmov s0, w8
3583 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
3584 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
3585 ; NONEON-NOSVE-NEXT: fmov w8, s0
3586 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3587 ; NONEON-NOSVE-NEXT: fmov s0, w8
3588 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
3589 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
3590 ; NONEON-NOSVE-NEXT: fmov w8, s0
3591 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3592 ; NONEON-NOSVE-NEXT: fmov s0, w8
3593 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
3594 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
3595 ; NONEON-NOSVE-NEXT: fmov w8, s0
3596 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3597 ; NONEON-NOSVE-NEXT: fmov s0, w8
3598 ; NONEON-NOSVE-NEXT: str h0, [sp, #22]
3599 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
3600 ; NONEON-NOSVE-NEXT: fmov w8, s0
3601 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3602 ; NONEON-NOSVE-NEXT: fmov s0, w8
3603 ; NONEON-NOSVE-NEXT: str h0, [sp, #20]
3604 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
3605 ; NONEON-NOSVE-NEXT: fmov w8, s0
3606 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3607 ; NONEON-NOSVE-NEXT: fmov s0, w8
3608 ; NONEON-NOSVE-NEXT: str h0, [sp, #18]
3609 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
3610 ; NONEON-NOSVE-NEXT: fmov w8, s0
3611 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3612 ; NONEON-NOSVE-NEXT: fmov s0, w8
3613 ; NONEON-NOSVE-NEXT: str h0, [sp, #16]
3614 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
3615 ; NONEON-NOSVE-NEXT: add sp, sp, #32
3616 ; NONEON-NOSVE-NEXT: ret
3617 %res = call <8 x half> @llvm.fabs.v8f16(<8 x half> %op)
3621 define void @fabs_v16f16(ptr %a) {
3622 ; CHECK-LABEL: fabs_v16f16:
3624 ; CHECK-NEXT: ldp q0, q1, [x0]
3625 ; CHECK-NEXT: ptrue p0.h, vl8
3626 ; CHECK-NEXT: fabs z0.h, p0/m, z0.h
3627 ; CHECK-NEXT: fabs z1.h, p0/m, z1.h
3628 ; CHECK-NEXT: stp q0, q1, [x0]
3631 ; NONEON-NOSVE-LABEL: fabs_v16f16:
3632 ; NONEON-NOSVE: // %bb.0:
3633 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
3634 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
3635 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
3636 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
3637 ; NONEON-NOSVE-NEXT: fmov w8, s0
3638 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3639 ; NONEON-NOSVE-NEXT: fmov s0, w8
3640 ; NONEON-NOSVE-NEXT: str h0, [sp, #62]
3641 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
3642 ; NONEON-NOSVE-NEXT: fmov w8, s0
3643 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3644 ; NONEON-NOSVE-NEXT: fmov s0, w8
3645 ; NONEON-NOSVE-NEXT: str h0, [sp, #60]
3646 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
3647 ; NONEON-NOSVE-NEXT: fmov w8, s0
3648 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3649 ; NONEON-NOSVE-NEXT: fmov s0, w8
3650 ; NONEON-NOSVE-NEXT: str h0, [sp, #58]
3651 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
3652 ; NONEON-NOSVE-NEXT: fmov w8, s0
3653 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3654 ; NONEON-NOSVE-NEXT: fmov s0, w8
3655 ; NONEON-NOSVE-NEXT: str h0, [sp, #56]
3656 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
3657 ; NONEON-NOSVE-NEXT: fmov w8, s0
3658 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3659 ; NONEON-NOSVE-NEXT: fmov s0, w8
3660 ; NONEON-NOSVE-NEXT: str h0, [sp, #54]
3661 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
3662 ; NONEON-NOSVE-NEXT: fmov w8, s0
3663 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3664 ; NONEON-NOSVE-NEXT: fmov s0, w8
3665 ; NONEON-NOSVE-NEXT: str h0, [sp, #52]
3666 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
3667 ; NONEON-NOSVE-NEXT: fmov w8, s0
3668 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3669 ; NONEON-NOSVE-NEXT: fmov s0, w8
3670 ; NONEON-NOSVE-NEXT: str h0, [sp, #50]
3671 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
3672 ; NONEON-NOSVE-NEXT: fmov w8, s0
3673 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3674 ; NONEON-NOSVE-NEXT: fmov s0, w8
3675 ; NONEON-NOSVE-NEXT: str h0, [sp, #48]
3676 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
3677 ; NONEON-NOSVE-NEXT: fmov w8, s0
3678 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3679 ; NONEON-NOSVE-NEXT: fmov s0, w8
3680 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
3681 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
3682 ; NONEON-NOSVE-NEXT: fmov w8, s0
3683 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3684 ; NONEON-NOSVE-NEXT: fmov s0, w8
3685 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
3686 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
3687 ; NONEON-NOSVE-NEXT: fmov w8, s0
3688 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3689 ; NONEON-NOSVE-NEXT: fmov s0, w8
3690 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
3691 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
3692 ; NONEON-NOSVE-NEXT: fmov w8, s0
3693 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3694 ; NONEON-NOSVE-NEXT: fmov s0, w8
3695 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
3696 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
3697 ; NONEON-NOSVE-NEXT: fmov w8, s0
3698 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3699 ; NONEON-NOSVE-NEXT: fmov s0, w8
3700 ; NONEON-NOSVE-NEXT: str h0, [sp, #38]
3701 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
3702 ; NONEON-NOSVE-NEXT: fmov w8, s0
3703 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3704 ; NONEON-NOSVE-NEXT: fmov s0, w8
3705 ; NONEON-NOSVE-NEXT: str h0, [sp, #36]
3706 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
3707 ; NONEON-NOSVE-NEXT: fmov w8, s0
3708 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3709 ; NONEON-NOSVE-NEXT: fmov s0, w8
3710 ; NONEON-NOSVE-NEXT: str h0, [sp, #34]
3711 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
3712 ; NONEON-NOSVE-NEXT: fmov w8, s0
3713 ; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
3714 ; NONEON-NOSVE-NEXT: fmov s0, w8
3715 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
3716 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
3717 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
3718 ; NONEON-NOSVE-NEXT: add sp, sp, #64
3719 ; NONEON-NOSVE-NEXT: ret
3720 %op = load <16 x half>, ptr %a
3721 %res = call <16 x half> @llvm.fabs.v16f16(<16 x half> %op)
3722 store <16 x half> %res, ptr %a
3726 define <2 x float> @fabs_v2f32(<2 x float> %op) {
3727 ; CHECK-LABEL: fabs_v2f32:
3729 ; CHECK-NEXT: ptrue p0.s, vl2
3730 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
3731 ; CHECK-NEXT: fabs z0.s, p0/m, z0.s
3732 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
3735 ; NONEON-NOSVE-LABEL: fabs_v2f32:
3736 ; NONEON-NOSVE: // %bb.0:
3737 ; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
3738 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
3739 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
3740 ; NONEON-NOSVE-NEXT: fabs s1, s0
3741 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
3742 ; NONEON-NOSVE-NEXT: fabs s0, s0
3743 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8]
3744 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
3745 ; NONEON-NOSVE-NEXT: add sp, sp, #16
3746 ; NONEON-NOSVE-NEXT: ret
3747 %res = call <2 x float> @llvm.fabs.v2f32(<2 x float> %op)
3748 ret <2 x float> %res
3751 define <4 x float> @fabs_v4f32(<4 x float> %op) {
3752 ; CHECK-LABEL: fabs_v4f32:
3754 ; CHECK-NEXT: ptrue p0.s, vl4
3755 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
3756 ; CHECK-NEXT: fabs z0.s, p0/m, z0.s
3757 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
3760 ; NONEON-NOSVE-LABEL: fabs_v4f32:
3761 ; NONEON-NOSVE: // %bb.0:
3762 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
3763 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
3764 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
3765 ; NONEON-NOSVE-NEXT: fabs s1, s0
3766 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
3767 ; NONEON-NOSVE-NEXT: fabs s0, s0
3768 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
3769 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
3770 ; NONEON-NOSVE-NEXT: fabs s1, s0
3771 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
3772 ; NONEON-NOSVE-NEXT: fabs s0, s0
3773 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
3774 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
3775 ; NONEON-NOSVE-NEXT: add sp, sp, #32
3776 ; NONEON-NOSVE-NEXT: ret
3777 %res = call <4 x float> @llvm.fabs.v4f32(<4 x float> %op)
3778 ret <4 x float> %res
3781 define void @fabs_v8f32(ptr %a) {
3782 ; CHECK-LABEL: fabs_v8f32:
3784 ; CHECK-NEXT: ldp q0, q1, [x0]
3785 ; CHECK-NEXT: ptrue p0.s, vl4
3786 ; CHECK-NEXT: fabs z0.s, p0/m, z0.s
3787 ; CHECK-NEXT: fabs z1.s, p0/m, z1.s
3788 ; CHECK-NEXT: stp q0, q1, [x0]
3791 ; NONEON-NOSVE-LABEL: fabs_v8f32:
3792 ; NONEON-NOSVE: // %bb.0:
3793 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
3794 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
3795 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
3796 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
3797 ; NONEON-NOSVE-NEXT: fabs s1, s0
3798 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
3799 ; NONEON-NOSVE-NEXT: fabs s0, s0
3800 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56]
3801 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
3802 ; NONEON-NOSVE-NEXT: fabs s1, s0
3803 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
3804 ; NONEON-NOSVE-NEXT: fabs s0, s0
3805 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48]
3806 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
3807 ; NONEON-NOSVE-NEXT: fabs s1, s0
3808 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
3809 ; NONEON-NOSVE-NEXT: fabs s0, s0
3810 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
3811 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
3812 ; NONEON-NOSVE-NEXT: fabs s1, s0
3813 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
3814 ; NONEON-NOSVE-NEXT: fabs s0, s0
3815 ; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
3816 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
3817 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
3818 ; NONEON-NOSVE-NEXT: add sp, sp, #64
3819 ; NONEON-NOSVE-NEXT: ret
3820 %op = load <8 x float>, ptr %a
3821 %res = call <8 x float> @llvm.fabs.v8f32(<8 x float> %op)
3822 store <8 x float> %res, ptr %a
3826 define <2 x double> @fabs_v2f64(<2 x double> %op) {
3827 ; CHECK-LABEL: fabs_v2f64:
3829 ; CHECK-NEXT: ptrue p0.d, vl2
3830 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
3831 ; CHECK-NEXT: fabs z0.d, p0/m, z0.d
3832 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
3835 ; NONEON-NOSVE-LABEL: fabs_v2f64:
3836 ; NONEON-NOSVE: // %bb.0:
3837 ; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
3838 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
3839 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
3840 ; NONEON-NOSVE-NEXT: fabs d1, d0
3841 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
3842 ; NONEON-NOSVE-NEXT: fabs d0, d0
3843 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
3844 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
3845 ; NONEON-NOSVE-NEXT: add sp, sp, #32
3846 ; NONEON-NOSVE-NEXT: ret
3847 %res = call <2 x double> @llvm.fabs.v2f64(<2 x double> %op)
3848 ret <2 x double> %res
3851 define void @fabs_v4f64(ptr %a) {
3852 ; CHECK-LABEL: fabs_v4f64:
3854 ; CHECK-NEXT: ldp q0, q1, [x0]
3855 ; CHECK-NEXT: ptrue p0.d, vl2
3856 ; CHECK-NEXT: fabs z0.d, p0/m, z0.d
3857 ; CHECK-NEXT: fabs z1.d, p0/m, z1.d
3858 ; CHECK-NEXT: stp q0, q1, [x0]
3861 ; NONEON-NOSVE-LABEL: fabs_v4f64:
3862 ; NONEON-NOSVE: // %bb.0:
3863 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
3864 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
3865 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
3866 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
3867 ; NONEON-NOSVE-NEXT: fabs d1, d0
3868 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
3869 ; NONEON-NOSVE-NEXT: fabs d0, d0
3870 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
3871 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
3872 ; NONEON-NOSVE-NEXT: fabs d1, d0
3873 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
3874 ; NONEON-NOSVE-NEXT: fabs d0, d0
3875 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
3876 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
3877 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
3878 ; NONEON-NOSVE-NEXT: add sp, sp, #64
3879 ; NONEON-NOSVE-NEXT: ret
3880 %op = load <4 x double>, ptr %a
3881 %res = call <4 x double> @llvm.fabs.v4f64(<4 x double> %op)
3882 store <4 x double> %res, ptr %a
3886 declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
3887 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
3888 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
3889 declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
3890 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
3891 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
3892 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
3893 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
3894 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
3896 declare <2 x half> @llvm.sqrt.v2f16(<2 x half>)
3897 declare <4 x half> @llvm.sqrt.v4f16(<4 x half>)
3898 declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
3899 declare <16 x half> @llvm.sqrt.v16f16(<16 x half>)
3900 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
3901 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
3902 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
3903 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
3904 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
3906 declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
3907 declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
3908 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
3909 declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
3910 declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
3911 declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
3912 declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
3913 declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
3914 declare <4 x double> @llvm.fabs.v4f64(<4 x double>)