1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5 target triple = "aarch64-unknown-linux-gnu"
11 define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) {
12 ; CHECK-LABEL: fma_v4f16:
14 ; CHECK-NEXT: ptrue p0.h, vl4
15 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
16 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
17 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
18 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
19 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
22 ; NONEON-NOSVE-LABEL: fma_v4f16:
23 ; NONEON-NOSVE: // %bb.0:
24 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-32]!
25 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
26 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
27 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
28 ; NONEON-NOSVE-NEXT: str d2, [sp, #16]
29 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #22]
30 ; NONEON-NOSVE-NEXT: ldr h6, [sp, #12]
31 ; NONEON-NOSVE-NEXT: ldr h7, [sp, #4]
32 ; NONEON-NOSVE-NEXT: fcvt s0, h0
33 ; NONEON-NOSVE-NEXT: fcvt s1, h1
34 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #10]
35 ; NONEON-NOSVE-NEXT: fcvt s2, h2
36 ; NONEON-NOSVE-NEXT: fcvt s6, h6
37 ; NONEON-NOSVE-NEXT: fcvt s7, h7
38 ; NONEON-NOSVE-NEXT: ldr h5, [sp, #2]
39 ; NONEON-NOSVE-NEXT: fcvt s4, h4
40 ; NONEON-NOSVE-NEXT: ldr h3, [sp]
41 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
42 ; NONEON-NOSVE-NEXT: fcvt s5, h5
43 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
44 ; NONEON-NOSVE-NEXT: fcvt s3, h3
45 ; NONEON-NOSVE-NEXT: fcvt s1, h1
46 ; NONEON-NOSVE-NEXT: fcvt h0, s0
47 ; NONEON-NOSVE-NEXT: fmul s1, s3, s1
48 ; NONEON-NOSVE-NEXT: fcvt s0, h0
49 ; NONEON-NOSVE-NEXT: fcvt h1, s1
50 ; NONEON-NOSVE-NEXT: fadd s0, s0, s2
51 ; NONEON-NOSVE-NEXT: fmul s2, s7, s6
52 ; NONEON-NOSVE-NEXT: fcvt s1, h1
53 ; NONEON-NOSVE-NEXT: fcvt h0, s0
54 ; NONEON-NOSVE-NEXT: fcvt h2, s2
55 ; NONEON-NOSVE-NEXT: str h0, [sp, #30]
56 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
57 ; NONEON-NOSVE-NEXT: fcvt s2, h2
58 ; NONEON-NOSVE-NEXT: fcvt s0, h0
59 ; NONEON-NOSVE-NEXT: fadd s0, s2, s0
60 ; NONEON-NOSVE-NEXT: fmul s2, s5, s4
61 ; NONEON-NOSVE-NEXT: fcvt h0, s0
62 ; NONEON-NOSVE-NEXT: fcvt h2, s2
63 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
64 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
65 ; NONEON-NOSVE-NEXT: fcvt s2, h2
66 ; NONEON-NOSVE-NEXT: fcvt s0, h0
67 ; NONEON-NOSVE-NEXT: fadd s0, s2, s0
68 ; NONEON-NOSVE-NEXT: fcvt h0, s0
69 ; NONEON-NOSVE-NEXT: str h0, [sp, #26]
70 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
71 ; NONEON-NOSVE-NEXT: fcvt s0, h0
72 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
73 ; NONEON-NOSVE-NEXT: fcvt h0, s0
74 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
75 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
76 ; NONEON-NOSVE-NEXT: add sp, sp, #32
77 ; NONEON-NOSVE-NEXT: ret
78 %mul = fmul contract <4 x half> %op1, %op2
79 %res = fadd contract <4 x half> %mul, %op3
83 define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) {
84 ; CHECK-LABEL: fma_v8f16:
86 ; CHECK-NEXT: ptrue p0.h, vl8
87 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
88 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
89 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
90 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
91 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
94 ; NONEON-NOSVE-LABEL: fma_v8f16:
95 ; NONEON-NOSVE: // %bb.0:
96 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]!
97 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
98 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
99 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
100 ; NONEON-NOSVE-NEXT: str q2, [sp, #32]
101 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #46]
102 ; NONEON-NOSVE-NEXT: ldr h22, [sp, #28]
103 ; NONEON-NOSVE-NEXT: ldr h23, [sp, #12]
104 ; NONEON-NOSVE-NEXT: fcvt s3, h0
105 ; NONEON-NOSVE-NEXT: fcvt s1, h1
106 ; NONEON-NOSVE-NEXT: ldr h20, [sp, #26]
107 ; NONEON-NOSVE-NEXT: fcvt s2, h2
108 ; NONEON-NOSVE-NEXT: fcvt s22, h22
109 ; NONEON-NOSVE-NEXT: fcvt s23, h23
110 ; NONEON-NOSVE-NEXT: ldr h21, [sp, #10]
111 ; NONEON-NOSVE-NEXT: fcvt s20, h20
112 ; NONEON-NOSVE-NEXT: ldr h18, [sp, #24]
113 ; NONEON-NOSVE-NEXT: ldr h19, [sp, #8]
114 ; NONEON-NOSVE-NEXT: ldr h16, [sp, #22]
115 ; NONEON-NOSVE-NEXT: ldr h17, [sp, #6]
116 ; NONEON-NOSVE-NEXT: fmul s5, s1, s3
117 ; NONEON-NOSVE-NEXT: fcvt s21, h21
118 ; NONEON-NOSVE-NEXT: fcvt s18, h18
119 ; NONEON-NOSVE-NEXT: fcvt s19, h19
120 ; NONEON-NOSVE-NEXT: fcvt s16, h16
121 ; NONEON-NOSVE-NEXT: fcvt s17, h17
122 ; NONEON-NOSVE-NEXT: ldr h6, [sp, #20]
123 ; NONEON-NOSVE-NEXT: ldr h7, [sp, #4]
124 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #18]
125 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #2]
126 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
127 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
128 ; NONEON-NOSVE-NEXT: fcvt h5, s5
129 ; NONEON-NOSVE-NEXT: fcvt s6, h6
130 ; NONEON-NOSVE-NEXT: fcvt s7, h7
131 ; NONEON-NOSVE-NEXT: fcvt s3, h3
132 ; NONEON-NOSVE-NEXT: fcvt s4, h4
133 ; NONEON-NOSVE-NEXT: fcvt s0, h0
134 ; NONEON-NOSVE-NEXT: fcvt s1, h1
135 ; NONEON-NOSVE-NEXT: fcvt s5, h5
136 ; NONEON-NOSVE-NEXT: fmul s3, s4, s3
137 ; NONEON-NOSVE-NEXT: fmul s0, s1, s0
138 ; NONEON-NOSVE-NEXT: fadd s2, s5, s2
139 ; NONEON-NOSVE-NEXT: fmul s5, s23, s22
140 ; NONEON-NOSVE-NEXT: fcvt h3, s3
141 ; NONEON-NOSVE-NEXT: fcvt h0, s0
142 ; NONEON-NOSVE-NEXT: fcvt h2, s2
143 ; NONEON-NOSVE-NEXT: fcvt h5, s5
144 ; NONEON-NOSVE-NEXT: fcvt s3, h3
145 ; NONEON-NOSVE-NEXT: fcvt s0, h0
146 ; NONEON-NOSVE-NEXT: str h2, [sp, #62]
147 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #44]
148 ; NONEON-NOSVE-NEXT: fcvt s5, h5
149 ; NONEON-NOSVE-NEXT: fcvt s2, h2
150 ; NONEON-NOSVE-NEXT: fadd s2, s5, s2
151 ; NONEON-NOSVE-NEXT: fmul s5, s21, s20
152 ; NONEON-NOSVE-NEXT: fcvt h2, s2
153 ; NONEON-NOSVE-NEXT: fcvt h5, s5
154 ; NONEON-NOSVE-NEXT: str h2, [sp, #60]
155 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #42]
156 ; NONEON-NOSVE-NEXT: fcvt s5, h5
157 ; NONEON-NOSVE-NEXT: fcvt s2, h2
158 ; NONEON-NOSVE-NEXT: fadd s2, s5, s2
159 ; NONEON-NOSVE-NEXT: fmul s5, s19, s18
160 ; NONEON-NOSVE-NEXT: fcvt h2, s2
161 ; NONEON-NOSVE-NEXT: fcvt h5, s5
162 ; NONEON-NOSVE-NEXT: str h2, [sp, #58]
163 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #40]
164 ; NONEON-NOSVE-NEXT: fcvt s5, h5
165 ; NONEON-NOSVE-NEXT: fcvt s2, h2
166 ; NONEON-NOSVE-NEXT: fadd s2, s5, s2
167 ; NONEON-NOSVE-NEXT: fmul s5, s17, s16
168 ; NONEON-NOSVE-NEXT: fcvt h2, s2
169 ; NONEON-NOSVE-NEXT: fcvt h5, s5
170 ; NONEON-NOSVE-NEXT: str h2, [sp, #56]
171 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #38]
172 ; NONEON-NOSVE-NEXT: fcvt s5, h5
173 ; NONEON-NOSVE-NEXT: fcvt s2, h2
174 ; NONEON-NOSVE-NEXT: fadd s2, s5, s2
175 ; NONEON-NOSVE-NEXT: fmul s5, s7, s6
176 ; NONEON-NOSVE-NEXT: fcvt h2, s2
177 ; NONEON-NOSVE-NEXT: fcvt h5, s5
178 ; NONEON-NOSVE-NEXT: str h2, [sp, #54]
179 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #36]
180 ; NONEON-NOSVE-NEXT: fcvt s5, h5
181 ; NONEON-NOSVE-NEXT: fcvt s2, h2
182 ; NONEON-NOSVE-NEXT: fadd s2, s5, s2
183 ; NONEON-NOSVE-NEXT: fcvt h2, s2
184 ; NONEON-NOSVE-NEXT: str h2, [sp, #52]
185 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #34]
186 ; NONEON-NOSVE-NEXT: fcvt s2, h2
187 ; NONEON-NOSVE-NEXT: fadd s2, s3, s2
188 ; NONEON-NOSVE-NEXT: fcvt h1, s2
189 ; NONEON-NOSVE-NEXT: str h1, [sp, #50]
190 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
191 ; NONEON-NOSVE-NEXT: fcvt s1, h1
192 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
193 ; NONEON-NOSVE-NEXT: fcvt h0, s0
194 ; NONEON-NOSVE-NEXT: str h0, [sp, #48]
195 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
196 ; NONEON-NOSVE-NEXT: add sp, sp, #64
197 ; NONEON-NOSVE-NEXT: ret
198 %mul = fmul contract <8 x half> %op1, %op2
199 %res = fadd contract <8 x half> %mul, %op3
203 define void @fma_v16f16(ptr %a, ptr %b, ptr %c) {
204 ; CHECK-LABEL: fma_v16f16:
206 ; CHECK-NEXT: ldp q0, q4, [x1]
207 ; CHECK-NEXT: ptrue p0.h, vl8
208 ; CHECK-NEXT: ldp q1, q5, [x2]
209 ; CHECK-NEXT: ldp q2, q3, [x0]
210 ; CHECK-NEXT: fmad z0.h, p0/m, z2.h, z1.h
211 ; CHECK-NEXT: movprfx z1, z5
212 ; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h
213 ; CHECK-NEXT: stp q0, q1, [x0]
216 ; NONEON-NOSVE-LABEL: fma_v16f16:
217 ; NONEON-NOSVE: // %bb.0:
218 ; NONEON-NOSVE-NEXT: sub sp, sp, #208
219 ; NONEON-NOSVE-NEXT: stp d15, d14, [sp, #144] // 16-byte Folded Spill
220 ; NONEON-NOSVE-NEXT: stp d13, d12, [sp, #160] // 16-byte Folded Spill
221 ; NONEON-NOSVE-NEXT: stp d11, d10, [sp, #176] // 16-byte Folded Spill
222 ; NONEON-NOSVE-NEXT: stp d9, d8, [sp, #192] // 16-byte Folded Spill
223 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 208
224 ; NONEON-NOSVE-NEXT: .cfi_offset b8, -8
225 ; NONEON-NOSVE-NEXT: .cfi_offset b9, -16
226 ; NONEON-NOSVE-NEXT: .cfi_offset b10, -24
227 ; NONEON-NOSVE-NEXT: .cfi_offset b11, -32
228 ; NONEON-NOSVE-NEXT: .cfi_offset b12, -40
229 ; NONEON-NOSVE-NEXT: .cfi_offset b13, -48
230 ; NONEON-NOSVE-NEXT: .cfi_offset b14, -56
231 ; NONEON-NOSVE-NEXT: .cfi_offset b15, -64
232 ; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
233 ; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
234 ; NONEON-NOSVE-NEXT: ldp q18, q19, [x2]
235 ; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #16]
236 ; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #64]
237 ; NONEON-NOSVE-NEXT: ldr h24, [sp, #46]
238 ; NONEON-NOSVE-NEXT: ldr h25, [sp, #30]
239 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #94]
240 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #78]
241 ; NONEON-NOSVE-NEXT: str q19, [sp, #96]
242 ; NONEON-NOSVE-NEXT: str q18, [sp, #48]
243 ; NONEON-NOSVE-NEXT: ldr h18, [sp, #110]
244 ; NONEON-NOSVE-NEXT: ldr h15, [sp, #92]
245 ; NONEON-NOSVE-NEXT: fcvt s20, h0
246 ; NONEON-NOSVE-NEXT: fcvt s21, h1
247 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
248 ; NONEON-NOSVE-NEXT: fcvt s18, h18
249 ; NONEON-NOSVE-NEXT: ldr h13, [sp, #90]
250 ; NONEON-NOSVE-NEXT: ldr h14, [sp, #74]
251 ; NONEON-NOSVE-NEXT: str h0, [sp, #14] // 2-byte Folded Spill
252 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #76]
253 ; NONEON-NOSVE-NEXT: ldr h11, [sp, #88]
254 ; NONEON-NOSVE-NEXT: ldr h12, [sp, #72]
255 ; NONEON-NOSVE-NEXT: ldr h9, [sp, #86]
256 ; NONEON-NOSVE-NEXT: ldr h10, [sp, #70]
257 ; NONEON-NOSVE-NEXT: fmul s30, s21, s20
258 ; NONEON-NOSVE-NEXT: fcvt s0, h0
259 ; NONEON-NOSVE-NEXT: ldr h31, [sp, #84]
260 ; NONEON-NOSVE-NEXT: ldr h8, [sp, #68]
261 ; NONEON-NOSVE-NEXT: ldr h28, [sp, #82]
262 ; NONEON-NOSVE-NEXT: ldr h29, [sp, #66]
263 ; NONEON-NOSVE-NEXT: ldr h26, [sp, #80]
264 ; NONEON-NOSVE-NEXT: ldr h27, [sp, #64]
265 ; NONEON-NOSVE-NEXT: ldr h22, [sp, #44]
266 ; NONEON-NOSVE-NEXT: ldr h23, [sp, #28]
267 ; NONEON-NOSVE-NEXT: ldr h20, [sp, #42]
268 ; NONEON-NOSVE-NEXT: ldr h21, [sp, #26]
269 ; NONEON-NOSVE-NEXT: fcvt h19, s30
270 ; NONEON-NOSVE-NEXT: fcvt s30, h15
271 ; NONEON-NOSVE-NEXT: ldr h16, [sp, #40]
272 ; NONEON-NOSVE-NEXT: ldr h17, [sp, #24]
273 ; NONEON-NOSVE-NEXT: ldr h6, [sp, #38]
274 ; NONEON-NOSVE-NEXT: ldr h7, [sp, #22]
275 ; NONEON-NOSVE-NEXT: fcvt s16, h16
276 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #36]
277 ; NONEON-NOSVE-NEXT: ldr h5, [sp, #20]
278 ; NONEON-NOSVE-NEXT: fcvt s17, h17
279 ; NONEON-NOSVE-NEXT: fcvt s6, h6
280 ; NONEON-NOSVE-NEXT: fcvt s7, h7
281 ; NONEON-NOSVE-NEXT: fcvt s19, h19
282 ; NONEON-NOSVE-NEXT: fmul s0, s0, s30
283 ; NONEON-NOSVE-NEXT: fcvt s30, h14
284 ; NONEON-NOSVE-NEXT: fcvt s4, h4
285 ; NONEON-NOSVE-NEXT: fcvt s5, h5
286 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #34]
287 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #18]
288 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #16]
289 ; NONEON-NOSVE-NEXT: fmul s16, s17, s16
290 ; NONEON-NOSVE-NEXT: fmul s6, s7, s6
291 ; NONEON-NOSVE-NEXT: fcvt s2, h2
292 ; NONEON-NOSVE-NEXT: fadd s18, s19, s18
293 ; NONEON-NOSVE-NEXT: fcvt h0, s0
294 ; NONEON-NOSVE-NEXT: fcvt s19, h13
295 ; NONEON-NOSVE-NEXT: fmul s4, s5, s4
296 ; NONEON-NOSVE-NEXT: fcvt s3, h3
297 ; NONEON-NOSVE-NEXT: fcvt s1, h1
298 ; NONEON-NOSVE-NEXT: ldp d15, d14, [sp, #144] // 16-byte Folded Reload
299 ; NONEON-NOSVE-NEXT: fcvt h16, s16
300 ; NONEON-NOSVE-NEXT: fcvt h6, s6
301 ; NONEON-NOSVE-NEXT: fcvt h18, s18
302 ; NONEON-NOSVE-NEXT: fcvt s0, h0
303 ; NONEON-NOSVE-NEXT: fcvt h4, s4
304 ; NONEON-NOSVE-NEXT: fmul s2, s3, s2
305 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #14] // 2-byte Folded Reload
306 ; NONEON-NOSVE-NEXT: fcvt s16, h16
307 ; NONEON-NOSVE-NEXT: fcvt s6, h6
308 ; NONEON-NOSVE-NEXT: fcvt s3, h3
309 ; NONEON-NOSVE-NEXT: str h18, [sp, #142]
310 ; NONEON-NOSVE-NEXT: ldr h18, [sp, #108]
311 ; NONEON-NOSVE-NEXT: fcvt s4, h4
312 ; NONEON-NOSVE-NEXT: fcvt h2, s2
313 ; NONEON-NOSVE-NEXT: fcvt s18, h18
314 ; NONEON-NOSVE-NEXT: fmul s1, s1, s3
315 ; NONEON-NOSVE-NEXT: fcvt s2, h2
316 ; NONEON-NOSVE-NEXT: fadd s0, s0, s18
317 ; NONEON-NOSVE-NEXT: fmul s18, s30, s19
318 ; NONEON-NOSVE-NEXT: fcvt s19, h11
319 ; NONEON-NOSVE-NEXT: fcvt s30, h12
320 ; NONEON-NOSVE-NEXT: fcvt h1, s1
321 ; NONEON-NOSVE-NEXT: ldp d13, d12, [sp, #160] // 16-byte Folded Reload
322 ; NONEON-NOSVE-NEXT: fcvt h0, s0
323 ; NONEON-NOSVE-NEXT: fcvt h18, s18
324 ; NONEON-NOSVE-NEXT: fcvt s1, h1
325 ; NONEON-NOSVE-NEXT: str h0, [sp, #140]
326 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #106]
327 ; NONEON-NOSVE-NEXT: fcvt s18, h18
328 ; NONEON-NOSVE-NEXT: fcvt s0, h0
329 ; NONEON-NOSVE-NEXT: fadd s0, s18, s0
330 ; NONEON-NOSVE-NEXT: fmul s18, s30, s19
331 ; NONEON-NOSVE-NEXT: fcvt s19, h9
332 ; NONEON-NOSVE-NEXT: fcvt s30, h10
333 ; NONEON-NOSVE-NEXT: ldp d11, d10, [sp, #176] // 16-byte Folded Reload
334 ; NONEON-NOSVE-NEXT: fcvt h0, s0
335 ; NONEON-NOSVE-NEXT: fcvt h18, s18
336 ; NONEON-NOSVE-NEXT: str h0, [sp, #138]
337 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #104]
338 ; NONEON-NOSVE-NEXT: fcvt s18, h18
339 ; NONEON-NOSVE-NEXT: fcvt s0, h0
340 ; NONEON-NOSVE-NEXT: fadd s0, s18, s0
341 ; NONEON-NOSVE-NEXT: fmul s18, s30, s19
342 ; NONEON-NOSVE-NEXT: fcvt s19, h31
343 ; NONEON-NOSVE-NEXT: fcvt s30, h8
344 ; NONEON-NOSVE-NEXT: ldp d9, d8, [sp, #192] // 16-byte Folded Reload
345 ; NONEON-NOSVE-NEXT: fcvt h0, s0
346 ; NONEON-NOSVE-NEXT: fcvt h18, s18
347 ; NONEON-NOSVE-NEXT: str h0, [sp, #136]
348 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #102]
349 ; NONEON-NOSVE-NEXT: fcvt s18, h18
350 ; NONEON-NOSVE-NEXT: fcvt s0, h0
351 ; NONEON-NOSVE-NEXT: fadd s0, s18, s0
352 ; NONEON-NOSVE-NEXT: fmul s18, s30, s19
353 ; NONEON-NOSVE-NEXT: fcvt s19, h28
354 ; NONEON-NOSVE-NEXT: fcvt s28, h29
355 ; NONEON-NOSVE-NEXT: fcvt h0, s0
356 ; NONEON-NOSVE-NEXT: fcvt h18, s18
357 ; NONEON-NOSVE-NEXT: str h0, [sp, #134]
358 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #100]
359 ; NONEON-NOSVE-NEXT: fcvt s18, h18
360 ; NONEON-NOSVE-NEXT: fcvt s0, h0
361 ; NONEON-NOSVE-NEXT: fadd s0, s18, s0
362 ; NONEON-NOSVE-NEXT: fmul s18, s28, s19
363 ; NONEON-NOSVE-NEXT: fcvt s19, h26
364 ; NONEON-NOSVE-NEXT: fcvt s26, h27
365 ; NONEON-NOSVE-NEXT: fcvt h0, s0
366 ; NONEON-NOSVE-NEXT: fcvt h18, s18
367 ; NONEON-NOSVE-NEXT: str h0, [sp, #132]
368 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #98]
369 ; NONEON-NOSVE-NEXT: fcvt s18, h18
370 ; NONEON-NOSVE-NEXT: fcvt s0, h0
371 ; NONEON-NOSVE-NEXT: fadd s0, s18, s0
372 ; NONEON-NOSVE-NEXT: fmul s18, s26, s19
373 ; NONEON-NOSVE-NEXT: fcvt s19, h24
374 ; NONEON-NOSVE-NEXT: fcvt s24, h25
375 ; NONEON-NOSVE-NEXT: fcvt h0, s0
376 ; NONEON-NOSVE-NEXT: fcvt h18, s18
377 ; NONEON-NOSVE-NEXT: str h0, [sp, #130]
378 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #96]
379 ; NONEON-NOSVE-NEXT: fcvt s18, h18
380 ; NONEON-NOSVE-NEXT: fcvt s0, h0
381 ; NONEON-NOSVE-NEXT: fadd s0, s18, s0
382 ; NONEON-NOSVE-NEXT: fmul s18, s24, s19
383 ; NONEON-NOSVE-NEXT: fcvt s19, h22
384 ; NONEON-NOSVE-NEXT: fcvt s22, h23
385 ; NONEON-NOSVE-NEXT: fcvt h0, s0
386 ; NONEON-NOSVE-NEXT: fcvt h18, s18
387 ; NONEON-NOSVE-NEXT: str h0, [sp, #128]
388 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
389 ; NONEON-NOSVE-NEXT: fcvt s18, h18
390 ; NONEON-NOSVE-NEXT: fcvt s0, h0
391 ; NONEON-NOSVE-NEXT: fadd s0, s18, s0
392 ; NONEON-NOSVE-NEXT: fmul s18, s22, s19
393 ; NONEON-NOSVE-NEXT: fcvt s19, h20
394 ; NONEON-NOSVE-NEXT: fcvt s20, h21
395 ; NONEON-NOSVE-NEXT: fcvt h0, s0
396 ; NONEON-NOSVE-NEXT: fcvt h18, s18
397 ; NONEON-NOSVE-NEXT: str h0, [sp, #126]
398 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
399 ; NONEON-NOSVE-NEXT: fcvt s18, h18
400 ; NONEON-NOSVE-NEXT: fcvt s0, h0
401 ; NONEON-NOSVE-NEXT: fadd s0, s18, s0
402 ; NONEON-NOSVE-NEXT: fmul s18, s20, s19
403 ; NONEON-NOSVE-NEXT: fcvt h0, s0
404 ; NONEON-NOSVE-NEXT: fcvt h18, s18
405 ; NONEON-NOSVE-NEXT: str h0, [sp, #124]
406 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
407 ; NONEON-NOSVE-NEXT: fcvt s18, h18
408 ; NONEON-NOSVE-NEXT: fcvt s0, h0
409 ; NONEON-NOSVE-NEXT: fadd s0, s18, s0
410 ; NONEON-NOSVE-NEXT: fcvt h0, s0
411 ; NONEON-NOSVE-NEXT: str h0, [sp, #122]
412 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
413 ; NONEON-NOSVE-NEXT: fcvt s0, h0
414 ; NONEON-NOSVE-NEXT: fadd s0, s16, s0
415 ; NONEON-NOSVE-NEXT: fcvt h0, s0
416 ; NONEON-NOSVE-NEXT: str h0, [sp, #120]
417 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
418 ; NONEON-NOSVE-NEXT: fcvt s0, h0
419 ; NONEON-NOSVE-NEXT: fadd s0, s6, s0
420 ; NONEON-NOSVE-NEXT: fcvt h0, s0
421 ; NONEON-NOSVE-NEXT: str h0, [sp, #118]
422 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
423 ; NONEON-NOSVE-NEXT: fcvt s0, h0
424 ; NONEON-NOSVE-NEXT: fadd s0, s4, s0
425 ; NONEON-NOSVE-NEXT: fcvt h0, s0
426 ; NONEON-NOSVE-NEXT: str h0, [sp, #116]
427 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
428 ; NONEON-NOSVE-NEXT: fcvt s0, h0
429 ; NONEON-NOSVE-NEXT: fadd s0, s2, s0
430 ; NONEON-NOSVE-NEXT: fcvt h0, s0
431 ; NONEON-NOSVE-NEXT: str h0, [sp, #114]
432 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
433 ; NONEON-NOSVE-NEXT: fcvt s0, h0
434 ; NONEON-NOSVE-NEXT: fadd s0, s1, s0
435 ; NONEON-NOSVE-NEXT: fcvt h0, s0
436 ; NONEON-NOSVE-NEXT: str h0, [sp, #112]
437 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #112]
438 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
439 ; NONEON-NOSVE-NEXT: add sp, sp, #208
440 ; NONEON-NOSVE-NEXT: ret
441 %op1 = load <16 x half>, ptr %a
442 %op2 = load <16 x half>, ptr %b
443 %op3 = load <16 x half>, ptr %c
444 %mul = fmul contract <16 x half> %op1, %op2
445 %res = fadd contract <16 x half> %mul, %op3
446 store <16 x half> %res, ptr %a
450 define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) {
451 ; CHECK-LABEL: fma_v2f32:
453 ; CHECK-NEXT: ptrue p0.s, vl2
454 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
455 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
456 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
457 ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
458 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
461 ; NONEON-NOSVE-LABEL: fma_v2f32:
462 ; NONEON-NOSVE: // %bb.0:
463 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
464 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
465 ; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8]
466 ; NONEON-NOSVE-NEXT: str d0, [sp]
467 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #8]
468 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp]
469 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
470 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
471 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
472 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
473 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #24]
474 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
475 ; NONEON-NOSVE-NEXT: add sp, sp, #32
476 ; NONEON-NOSVE-NEXT: ret
477 %mul = fmul contract <2 x float> %op1, %op2
478 %res = fadd contract <2 x float> %mul, %op3
482 define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) {
483 ; CHECK-LABEL: fma_v4f32:
485 ; CHECK-NEXT: ptrue p0.s, vl4
486 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
487 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
488 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
489 ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
490 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
493 ; NONEON-NOSVE-LABEL: fma_v4f32:
494 ; NONEON-NOSVE: // %bb.0:
495 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
496 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
497 ; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16]
498 ; NONEON-NOSVE-NEXT: str q0, [sp]
499 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24]
500 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
501 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
502 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
503 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
504 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
505 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16]
506 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp]
507 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #56]
508 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
509 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
510 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
511 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
512 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #48]
513 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
514 ; NONEON-NOSVE-NEXT: add sp, sp, #64
515 ; NONEON-NOSVE-NEXT: ret
516 %mul = fmul contract <4 x float> %op1, %op2
517 %res = fadd contract <4 x float> %mul, %op3
521 define void @fma_v8f32(ptr %a, ptr %b, ptr %c) {
522 ; CHECK-LABEL: fma_v8f32:
524 ; CHECK-NEXT: ldp q0, q4, [x1]
525 ; CHECK-NEXT: ptrue p0.s, vl4
526 ; CHECK-NEXT: ldp q1, q5, [x2]
527 ; CHECK-NEXT: ldp q2, q3, [x0]
528 ; CHECK-NEXT: fmad z0.s, p0/m, z2.s, z1.s
529 ; CHECK-NEXT: movprfx z1, z5
530 ; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s
531 ; CHECK-NEXT: stp q0, q1, [x0]
534 ; NONEON-NOSVE-LABEL: fma_v8f32:
535 ; NONEON-NOSVE: // %bb.0:
536 ; NONEON-NOSVE-NEXT: sub sp, sp, #128
537 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
538 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x2]
539 ; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
540 ; NONEON-NOSVE-NEXT: ldp q4, q5, [x0]
541 ; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64]
542 ; NONEON-NOSVE-NEXT: stp q4, q2, [sp]
543 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #92]
544 ; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32]
545 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #72]
546 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #56]
547 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
548 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #88]
549 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
550 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #64]
551 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #48]
552 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #120]
553 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #84]
554 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
555 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #80]
556 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
557 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24]
558 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
559 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #112]
560 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
561 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
562 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
563 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
564 ; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16]
565 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp]
566 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #104]
567 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
568 ; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
569 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
570 ; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
571 ; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #96]
572 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
573 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
574 ; NONEON-NOSVE-NEXT: add sp, sp, #128
575 ; NONEON-NOSVE-NEXT: ret
576 %op1 = load <8 x float>, ptr %a
577 %op2 = load <8 x float>, ptr %b
578 %op3 = load <8 x float>, ptr %c
579 %mul = fmul contract <8 x float> %op1, %op2
580 %res = fadd contract <8 x float> %mul, %op3
581 store <8 x float> %res, ptr %a
585 define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double> %op3) {
586 ; CHECK-LABEL: fma_v1f64:
588 ; CHECK-NEXT: fmadd d0, d0, d1, d2
591 ; NONEON-NOSVE-LABEL: fma_v1f64:
592 ; NONEON-NOSVE: // %bb.0:
593 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
594 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
595 ; NONEON-NOSVE-NEXT: fmadd d0, d0, d1, d2
596 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
597 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
598 ; NONEON-NOSVE-NEXT: add sp, sp, #16
599 ; NONEON-NOSVE-NEXT: ret
600 %mul = fmul contract <1 x double> %op1, %op2
601 %res = fadd contract <1 x double> %mul, %op3
602 ret <1 x double> %res
605 define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) {
606 ; CHECK-LABEL: fma_v2f64:
608 ; CHECK-NEXT: ptrue p0.d, vl2
609 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
610 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
611 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
612 ; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
613 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
616 ; NONEON-NOSVE-LABEL: fma_v2f64:
617 ; NONEON-NOSVE: // %bb.0:
618 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
619 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
620 ; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16]
621 ; NONEON-NOSVE-NEXT: str q0, [sp]
622 ; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16]
623 ; NONEON-NOSVE-NEXT: ldp d2, d4, [sp]
624 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
625 ; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0
626 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
627 ; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
628 ; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #48]
629 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
630 ; NONEON-NOSVE-NEXT: add sp, sp, #64
631 ; NONEON-NOSVE-NEXT: ret
632 %mul = fmul contract <2 x double> %op1, %op2
633 %res = fadd contract <2 x double> %mul, %op3
634 ret <2 x double> %res
637 define void @fma_v4f64(ptr %a, ptr %b, ptr %c) {
638 ; CHECK-LABEL: fma_v4f64:
640 ; CHECK-NEXT: ldp q0, q4, [x1]
641 ; CHECK-NEXT: ptrue p0.d, vl2
642 ; CHECK-NEXT: ldp q1, q5, [x2]
643 ; CHECK-NEXT: ldp q2, q3, [x0]
644 ; CHECK-NEXT: fmad z0.d, p0/m, z2.d, z1.d
645 ; CHECK-NEXT: movprfx z1, z5
646 ; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d
647 ; CHECK-NEXT: stp q0, q1, [x0]
650 ; NONEON-NOSVE-LABEL: fma_v4f64:
651 ; NONEON-NOSVE: // %bb.0:
652 ; NONEON-NOSVE-NEXT: sub sp, sp, #128
653 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
654 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x2]
655 ; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
656 ; NONEON-NOSVE-NEXT: ldp q4, q5, [x0]
657 ; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64]
658 ; NONEON-NOSVE-NEXT: stp q4, q2, [sp]
659 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #88]
660 ; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32]
661 ; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #64]
662 ; NONEON-NOSVE-NEXT: ldp d2, d4, [sp, #48]
663 ; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0
664 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #80]
665 ; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
666 ; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16]
667 ; NONEON-NOSVE-NEXT: ldp d2, d4, [sp]
668 ; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #112]
669 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
670 ; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0
671 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
672 ; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
673 ; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #96]
674 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
675 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
676 ; NONEON-NOSVE-NEXT: add sp, sp, #128
677 ; NONEON-NOSVE-NEXT: ret
678 %op1 = load <4 x double>, ptr %a
679 %op2 = load <4 x double>, ptr %b
680 %op3 = load <4 x double>, ptr %c
681 %mul = fmul contract <4 x double> %op1, %op2
682 %res = fadd contract <4 x double> %mul, %op3
683 store <4 x double> %res, ptr %a