1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s \
3 ; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL
4 ; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s \
5 ; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL
8 declare double @streaming_callee(double) "aarch64_pstate_sm_enabled"
9 declare double @normal_callee(double)
11 define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline optnone {
12 ; CHECK-FISEL-LABEL: nonstreaming_caller_streaming_callee:
13 ; CHECK-FISEL: // %bb.0: // %entry
14 ; CHECK-FISEL-NEXT: sub sp, sp, #96
15 ; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
16 ; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
17 ; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
18 ; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
19 ; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
20 ; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
21 ; CHECK-FISEL-NEXT: smstart sm
22 ; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
23 ; CHECK-FISEL-NEXT: bl streaming_callee
24 ; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
25 ; CHECK-FISEL-NEXT: smstop sm
26 ; CHECK-FISEL-NEXT: adrp x8, .LCPI0_0
27 ; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI0_0]
28 ; CHECK-FISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
29 ; CHECK-FISEL-NEXT: fadd d0, d1, d0
30 ; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
31 ; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
32 ; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
33 ; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
34 ; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
35 ; CHECK-FISEL-NEXT: add sp, sp, #96
36 ; CHECK-FISEL-NEXT: ret
38 ; CHECK-GISEL-LABEL: nonstreaming_caller_streaming_callee:
39 ; CHECK-GISEL: // %bb.0: // %entry
40 ; CHECK-GISEL-NEXT: sub sp, sp, #96
41 ; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
42 ; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
43 ; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
44 ; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
45 ; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
46 ; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
47 ; CHECK-GISEL-NEXT: smstart sm
48 ; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
49 ; CHECK-GISEL-NEXT: bl streaming_callee
50 ; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
51 ; CHECK-GISEL-NEXT: smstop sm
52 ; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
53 ; CHECK-GISEL-NEXT: fmov d0, x8
54 ; CHECK-GISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
55 ; CHECK-GISEL-NEXT: fadd d0, d1, d0
56 ; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
57 ; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
58 ; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
59 ; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
60 ; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
61 ; CHECK-GISEL-NEXT: add sp, sp, #96
62 ; CHECK-GISEL-NEXT: ret
64 %call = call double @streaming_callee(double %x) "aarch64_pstate_sm_enabled"
65 %add = fadd double %call, 4.200000e+01
70 define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" {
71 ; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee:
72 ; CHECK-COMMON: // %bb.0: // %entry
73 ; CHECK-COMMON-NEXT: sub sp, sp, #96
74 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
75 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
76 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
77 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
78 ; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
79 ; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill
80 ; CHECK-COMMON-NEXT: smstop sm
81 ; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload
82 ; CHECK-COMMON-NEXT: bl normal_callee
83 ; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
84 ; CHECK-COMMON-NEXT: smstart sm
85 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
86 ; CHECK-COMMON-NEXT: fmov d0, x8
87 ; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
88 ; CHECK-COMMON-NEXT: fadd d0, d1, d0
89 ; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
90 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
91 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
92 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
93 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
94 ; CHECK-COMMON-NEXT: add sp, sp, #96
95 ; CHECK-COMMON-NEXT: ret
97 %call = call double @normal_callee(double %x)
98 %add = fadd double %call, 4.200000e+01
102 define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" {
103 ; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee:
104 ; CHECK-COMMON: // %bb.0:
105 ; CHECK-COMMON-NEXT: sub sp, sp, #112
106 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
107 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
108 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
109 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
110 ; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
111 ; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill
112 ; CHECK-COMMON-NEXT: smstart sm
113 ; CHECK-COMMON-NEXT: smstop sm
114 ; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload
115 ; CHECK-COMMON-NEXT: bl normal_callee
116 ; CHECK-COMMON-NEXT: str d0, [sp, #16] // 8-byte Folded Spill
117 ; CHECK-COMMON-NEXT: smstart sm
118 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
119 ; CHECK-COMMON-NEXT: fmov d0, x8
120 ; CHECK-COMMON-NEXT: ldr d1, [sp, #16] // 8-byte Folded Reload
121 ; CHECK-COMMON-NEXT: fadd d0, d1, d0
122 ; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
123 ; CHECK-COMMON-NEXT: smstop sm
124 ; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
125 ; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
126 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
127 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
128 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
129 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
130 ; CHECK-COMMON-NEXT: add sp, sp, #112
131 ; CHECK-COMMON-NEXT: ret
132 %call = call double @normal_callee(double %x);
133 %add = fadd double %call, 4.200000e+01
137 define double @normal_caller_to_locally_streaming_callee(double %x) nounwind noinline optnone {
138 ; CHECK-FISEL-LABEL: normal_caller_to_locally_streaming_callee:
139 ; CHECK-FISEL: // %bb.0:
140 ; CHECK-FISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
141 ; CHECK-FISEL-NEXT: bl locally_streaming_caller_normal_callee
142 ; CHECK-FISEL-NEXT: adrp x8, .LCPI3_0
143 ; CHECK-FISEL-NEXT: ldr d1, [x8, :lo12:.LCPI3_0]
144 ; CHECK-FISEL-NEXT: fadd d0, d0, d1
145 ; CHECK-FISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
146 ; CHECK-FISEL-NEXT: ret
148 ; CHECK-GISEL-LABEL: normal_caller_to_locally_streaming_callee:
149 ; CHECK-GISEL: // %bb.0:
150 ; CHECK-GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
151 ; CHECK-GISEL-NEXT: bl locally_streaming_caller_normal_callee
152 ; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
153 ; CHECK-GISEL-NEXT: fmov d1, x8
154 ; CHECK-GISEL-NEXT: fadd d0, d0, d1
155 ; CHECK-GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
156 ; CHECK-GISEL-NEXT: ret
157 %call = call double @locally_streaming_caller_normal_callee(double %x) "aarch64_pstate_sm_body";
158 %add = fadd double %call, 4.200000e+01
162 ; Check attribute in the call itself
164 define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noinline optnone "aarch64_pstate_sm_body" {
165 ; CHECK-COMMON-LABEL: locally_streaming_caller_streaming_callee_ptr:
166 ; CHECK-COMMON: // %bb.0:
167 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
168 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
169 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
170 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
171 ; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
172 ; CHECK-COMMON-NEXT: smstart sm
173 ; CHECK-COMMON-NEXT: blr x0
174 ; CHECK-COMMON-NEXT: smstop sm
175 ; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
176 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
177 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
178 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
179 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
180 ; CHECK-COMMON-NEXT: ret
181 call void %p() "aarch64_pstate_sm_enabled"
185 define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optnone {
186 ; CHECK-COMMON-LABEL: normal_call_to_streaming_callee_ptr:
187 ; CHECK-COMMON: // %bb.0:
188 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
189 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
190 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
191 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
192 ; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
193 ; CHECK-COMMON-NEXT: smstart sm
194 ; CHECK-COMMON-NEXT: blr x0
195 ; CHECK-COMMON-NEXT: smstop sm
196 ; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
197 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
198 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
199 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
200 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
201 ; CHECK-COMMON-NEXT: ret
202 call void %p() "aarch64_pstate_sm_enabled"
210 declare double @za_shared_callee(double) "aarch64_pstate_za_shared"
212 define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_new"{
213 ; CHECK-COMMON-LABEL: za_new_caller_to_za_shared_callee:
214 ; CHECK-COMMON: // %bb.0: // %prelude
215 ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
216 ; CHECK-COMMON-NEXT: mov x29, sp
217 ; CHECK-COMMON-NEXT: sub sp, sp, #16
218 ; CHECK-COMMON-NEXT: rdsvl x8, #1
219 ; CHECK-COMMON-NEXT: mov x9, sp
220 ; CHECK-COMMON-NEXT: msub x8, x8, x8, x9
221 ; CHECK-COMMON-NEXT: mov sp, x8
222 ; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
223 ; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
224 ; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
225 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
226 ; CHECK-COMMON-NEXT: cbz x8, .LBB6_2
227 ; CHECK-COMMON-NEXT: b .LBB6_1
228 ; CHECK-COMMON-NEXT: .LBB6_1: // %save.za
229 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_save
230 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
231 ; CHECK-COMMON-NEXT: b .LBB6_2
232 ; CHECK-COMMON-NEXT: .LBB6_2: // %entry
233 ; CHECK-COMMON-NEXT: smstart za
234 ; CHECK-COMMON-NEXT: zero {za}
235 ; CHECK-COMMON-NEXT: bl za_shared_callee
236 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
237 ; CHECK-COMMON-NEXT: fmov d1, x8
238 ; CHECK-COMMON-NEXT: fadd d0, d0, d1
239 ; CHECK-COMMON-NEXT: smstop za
240 ; CHECK-COMMON-NEXT: mov sp, x29
241 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
242 ; CHECK-COMMON-NEXT: ret
244 %call = call double @za_shared_callee(double %x)
245 %add = fadd double %call, 4.200000e+01
249 define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_shared"{
250 ; CHECK-COMMON-LABEL: za_shared_caller_to_za_none_callee:
251 ; CHECK-COMMON: // %bb.0: // %entry
252 ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
253 ; CHECK-COMMON-NEXT: mov x29, sp
254 ; CHECK-COMMON-NEXT: sub sp, sp, #16
255 ; CHECK-COMMON-NEXT: rdsvl x8, #1
256 ; CHECK-COMMON-NEXT: mov x9, sp
257 ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
258 ; CHECK-COMMON-NEXT: mov sp, x9
259 ; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
260 ; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
261 ; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
262 ; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
263 ; CHECK-COMMON-NEXT: sub x8, x29, #16
264 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
265 ; CHECK-COMMON-NEXT: bl normal_callee
266 ; CHECK-COMMON-NEXT: smstart za
267 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
268 ; CHECK-COMMON-NEXT: sub x0, x29, #16
269 ; CHECK-COMMON-NEXT: cbz x8, .LBB7_1
270 ; CHECK-COMMON-NEXT: b .LBB7_2
271 ; CHECK-COMMON-NEXT: .LBB7_1: // %entry
272 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
273 ; CHECK-COMMON-NEXT: b .LBB7_2
274 ; CHECK-COMMON-NEXT: .LBB7_2: // %entry
275 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
276 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
277 ; CHECK-COMMON-NEXT: fmov d1, x8
278 ; CHECK-COMMON-NEXT: fadd d0, d0, d1
279 ; CHECK-COMMON-NEXT: mov sp, x29
280 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
281 ; CHECK-COMMON-NEXT: ret
283 %call = call double @normal_callee(double %x)
284 %add = fadd double %call, 4.200000e+01
288 ; Ensure we set up and restore the lazy save correctly for instructions which are lowered to lib calls.
289 define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwind {
290 ; CHECK-COMMON-LABEL: f128_call_za:
291 ; CHECK-COMMON: // %bb.0:
292 ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
293 ; CHECK-COMMON-NEXT: mov x29, sp
294 ; CHECK-COMMON-NEXT: sub sp, sp, #16
295 ; CHECK-COMMON-NEXT: rdsvl x8, #1
296 ; CHECK-COMMON-NEXT: mov x9, sp
297 ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
298 ; CHECK-COMMON-NEXT: mov sp, x9
299 ; CHECK-COMMON-NEXT: sub x10, x29, #16
300 ; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
301 ; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
302 ; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
303 ; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
304 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
305 ; CHECK-COMMON-NEXT: bl __addtf3
306 ; CHECK-COMMON-NEXT: smstart za
307 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
308 ; CHECK-COMMON-NEXT: sub x0, x29, #16
309 ; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2
310 ; CHECK-COMMON-NEXT: // %bb.1:
311 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
312 ; CHECK-COMMON-NEXT: .LBB8_2:
313 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
314 ; CHECK-COMMON-NEXT: mov sp, x29
315 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
316 ; CHECK-COMMON-NEXT: ret
317 %res = fadd fp128 %a, %b
322 ; Ensure we fall back to SelectionDAG isel here so that we temporarily disable streaming mode to lower the fadd (with function calls).
323 define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounwind {
324 ; CHECK-COMMON-LABEL: f128_call_sm:
325 ; CHECK-COMMON: // %bb.0:
326 ; CHECK-COMMON-NEXT: sub sp, sp, #112
327 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
328 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
329 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
330 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
331 ; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
332 ; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
333 ; CHECK-COMMON-NEXT: smstop sm
334 ; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
335 ; CHECK-COMMON-NEXT: bl __addtf3
336 ; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
337 ; CHECK-COMMON-NEXT: smstart sm
338 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
339 ; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
340 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
341 ; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
342 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
343 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
344 ; CHECK-COMMON-NEXT: add sp, sp, #112
345 ; CHECK-COMMON-NEXT: ret
346 %res = fadd fp128 %a, %b
350 ; As above this should use Selection DAG to make sure the libcall call is lowered correctly.
351 define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nounwind {
352 ; CHECK-COMMON-LABEL: frem_call_za:
353 ; CHECK-COMMON: // %bb.0:
354 ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
355 ; CHECK-COMMON-NEXT: mov x29, sp
356 ; CHECK-COMMON-NEXT: sub sp, sp, #16
357 ; CHECK-COMMON-NEXT: rdsvl x8, #1
358 ; CHECK-COMMON-NEXT: mov x9, sp
359 ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
360 ; CHECK-COMMON-NEXT: mov sp, x9
361 ; CHECK-COMMON-NEXT: sub x10, x29, #16
362 ; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
363 ; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
364 ; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
365 ; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
366 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
367 ; CHECK-COMMON-NEXT: bl fmod
368 ; CHECK-COMMON-NEXT: smstart za
369 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
370 ; CHECK-COMMON-NEXT: sub x0, x29, #16
371 ; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2
372 ; CHECK-COMMON-NEXT: // %bb.1:
373 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
374 ; CHECK-COMMON-NEXT: .LBB10_2:
375 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
376 ; CHECK-COMMON-NEXT: mov sp, x29
377 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
378 ; CHECK-COMMON-NEXT: ret
379 %res = frem double %a, %b
383 ; As above this should use Selection DAG to make sure the libcall is lowered correctly.
384 define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
385 ; CHECK-COMMON-LABEL: frem_call_sm:
386 ; CHECK-COMMON: // %bb.0:
387 ; CHECK-COMMON-NEXT: sub sp, sp, #96
388 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
389 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
390 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
391 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
392 ; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
393 ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
394 ; CHECK-COMMON-NEXT: smstop sm
395 ; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
396 ; CHECK-COMMON-NEXT: bl fmodf
397 ; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
398 ; CHECK-COMMON-NEXT: smstart sm
399 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
400 ; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
401 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
402 ; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
403 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
404 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
405 ; CHECK-COMMON-NEXT: add sp, sp, #96
406 ; CHECK-COMMON-NEXT: ret
407 %res = frem float %a, %b
411 ; As above this should use Selection DAG to make sure the libcall is lowered correctly.
412 define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind {
413 ; CHECK-COMMON-LABEL: frem_call_sm_compat:
414 ; CHECK-COMMON: // %bb.0:
415 ; CHECK-COMMON-NEXT: sub sp, sp, #96
416 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
417 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
418 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
419 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
420 ; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
421 ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
422 ; CHECK-COMMON-NEXT: bl __arm_sme_state
423 ; CHECK-COMMON-NEXT: and x19, x0, #0x1
424 ; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
425 ; CHECK-COMMON-NEXT: // %bb.1:
426 ; CHECK-COMMON-NEXT: smstop sm
427 ; CHECK-COMMON-NEXT: .LBB12_2:
428 ; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
429 ; CHECK-COMMON-NEXT: bl fmodf
430 ; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
431 ; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_4
432 ; CHECK-COMMON-NEXT: // %bb.3:
433 ; CHECK-COMMON-NEXT: smstart sm
434 ; CHECK-COMMON-NEXT: .LBB12_4:
435 ; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
436 ; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
437 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
438 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
439 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
440 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
441 ; CHECK-COMMON-NEXT: add sp, sp, #96
442 ; CHECK-COMMON-NEXT: ret
443 %res = frem float %a, %b