1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
3 ; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL
4 ; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \
5 ; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL
8 declare double @streaming_callee(double) "aarch64_pstate_sm_enabled"
9 declare double @normal_callee(double)
11 define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline optnone {
12 ; CHECK-FISEL-LABEL: nonstreaming_caller_streaming_callee:
13 ; CHECK-FISEL: // %bb.0: // %entry
14 ; CHECK-FISEL-NEXT: sub sp, sp, #96
15 ; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
16 ; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
17 ; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
18 ; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
19 ; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
20 ; CHECK-FISEL-NEXT: cntd x9
21 ; CHECK-FISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
22 ; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
23 ; CHECK-FISEL-NEXT: smstart sm
24 ; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
25 ; CHECK-FISEL-NEXT: bl streaming_callee
26 ; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
27 ; CHECK-FISEL-NEXT: smstop sm
28 ; CHECK-FISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
29 ; CHECK-FISEL-NEXT: adrp x8, .LCPI0_0
30 ; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI0_0]
31 ; CHECK-FISEL-NEXT: fadd d0, d1, d0
32 ; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
33 ; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
34 ; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
35 ; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
36 ; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
37 ; CHECK-FISEL-NEXT: add sp, sp, #96
38 ; CHECK-FISEL-NEXT: ret
40 ; CHECK-GISEL-LABEL: nonstreaming_caller_streaming_callee:
41 ; CHECK-GISEL: // %bb.0: // %entry
42 ; CHECK-GISEL-NEXT: sub sp, sp, #96
43 ; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
44 ; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
45 ; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
46 ; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
47 ; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
48 ; CHECK-GISEL-NEXT: cntd x9
49 ; CHECK-GISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
50 ; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
51 ; CHECK-GISEL-NEXT: smstart sm
52 ; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
53 ; CHECK-GISEL-NEXT: bl streaming_callee
54 ; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
55 ; CHECK-GISEL-NEXT: smstop sm
56 ; CHECK-GISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
57 ; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
58 ; CHECK-GISEL-NEXT: fmov d0, x8
59 ; CHECK-GISEL-NEXT: fadd d0, d1, d0
60 ; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
61 ; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
62 ; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
63 ; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
64 ; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
65 ; CHECK-GISEL-NEXT: add sp, sp, #96
66 ; CHECK-GISEL-NEXT: ret
68 %call = call double @streaming_callee(double %x) "aarch64_pstate_sm_enabled"
69 %add = fadd double %call, 4.200000e+01
74 define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" {
75 ; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee:
76 ; CHECK-COMMON: // %bb.0: // %entry
77 ; CHECK-COMMON-NEXT: sub sp, sp, #96
78 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
79 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
80 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
81 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
82 ; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
83 ; CHECK-COMMON-NEXT: cntd x9
84 ; CHECK-COMMON-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
85 ; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill
86 ; CHECK-COMMON-NEXT: smstop sm
87 ; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload
88 ; CHECK-COMMON-NEXT: bl normal_callee
89 ; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
90 ; CHECK-COMMON-NEXT: smstart sm
91 ; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
92 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
93 ; CHECK-COMMON-NEXT: fmov d0, x8
94 ; CHECK-COMMON-NEXT: fadd d0, d1, d0
95 ; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
96 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
97 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
98 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
99 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
100 ; CHECK-COMMON-NEXT: add sp, sp, #96
101 ; CHECK-COMMON-NEXT: ret
103 %call = call double @normal_callee(double %x)
104 %add = fadd double %call, 4.200000e+01
108 define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" {
109 ; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee:
110 ; CHECK-COMMON: // %bb.0:
111 ; CHECK-COMMON-NEXT: sub sp, sp, #128
112 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
113 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
114 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
115 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
116 ; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
117 ; CHECK-COMMON-NEXT: rdsvl x9, #1
118 ; CHECK-COMMON-NEXT: lsr x9, x9, #3
119 ; CHECK-COMMON-NEXT: str x9, [sp, #104] // 8-byte Folded Spill
120 ; CHECK-COMMON-NEXT: cntd x9
121 ; CHECK-COMMON-NEXT: str x9, [sp, #112] // 8-byte Folded Spill
122 ; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill
123 ; CHECK-COMMON-NEXT: smstart sm
124 ; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload
125 ; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill
126 ; CHECK-COMMON-NEXT: smstop sm
127 ; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload
128 ; CHECK-COMMON-NEXT: bl normal_callee
129 ; CHECK-COMMON-NEXT: str d0, [sp, #16] // 8-byte Folded Spill
130 ; CHECK-COMMON-NEXT: smstart sm
131 ; CHECK-COMMON-NEXT: ldr d1, [sp, #16] // 8-byte Folded Reload
132 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
133 ; CHECK-COMMON-NEXT: fmov d0, x8
134 ; CHECK-COMMON-NEXT: fadd d0, d1, d0
135 ; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
136 ; CHECK-COMMON-NEXT: smstop sm
137 ; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
138 ; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
139 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
140 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
141 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
142 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
143 ; CHECK-COMMON-NEXT: add sp, sp, #128
144 ; CHECK-COMMON-NEXT: ret
145 %call = call double @normal_callee(double %x);
146 %add = fadd double %call, 4.200000e+01
150 define double @normal_caller_to_locally_streaming_callee(double %x) nounwind noinline optnone {
151 ; CHECK-FISEL-LABEL: normal_caller_to_locally_streaming_callee:
152 ; CHECK-FISEL: // %bb.0:
153 ; CHECK-FISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
154 ; CHECK-FISEL-NEXT: bl locally_streaming_caller_normal_callee
155 ; CHECK-FISEL-NEXT: adrp x8, .LCPI3_0
156 ; CHECK-FISEL-NEXT: ldr d1, [x8, :lo12:.LCPI3_0]
157 ; CHECK-FISEL-NEXT: fadd d0, d0, d1
158 ; CHECK-FISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
159 ; CHECK-FISEL-NEXT: ret
161 ; CHECK-GISEL-LABEL: normal_caller_to_locally_streaming_callee:
162 ; CHECK-GISEL: // %bb.0:
163 ; CHECK-GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
164 ; CHECK-GISEL-NEXT: bl locally_streaming_caller_normal_callee
165 ; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
166 ; CHECK-GISEL-NEXT: fmov d1, x8
167 ; CHECK-GISEL-NEXT: fadd d0, d0, d1
168 ; CHECK-GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
169 ; CHECK-GISEL-NEXT: ret
170 %call = call double @locally_streaming_caller_normal_callee(double %x) "aarch64_pstate_sm_body";
171 %add = fadd double %call, 4.200000e+01
175 ; Check attribute in the call itself
177 define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noinline optnone "aarch64_pstate_sm_body" {
178 ; CHECK-COMMON-LABEL: locally_streaming_caller_streaming_callee_ptr:
179 ; CHECK-COMMON: // %bb.0:
180 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
181 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
182 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
183 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
184 ; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
185 ; CHECK-COMMON-NEXT: rdsvl x9, #1
186 ; CHECK-COMMON-NEXT: lsr x9, x9, #3
187 ; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
188 ; CHECK-COMMON-NEXT: cntd x9
189 ; CHECK-COMMON-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
190 ; CHECK-COMMON-NEXT: smstart sm
191 ; CHECK-COMMON-NEXT: blr x0
192 ; CHECK-COMMON-NEXT: smstop sm
193 ; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
194 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
195 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
196 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
197 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
198 ; CHECK-COMMON-NEXT: ret
199 call void %p() "aarch64_pstate_sm_enabled"
203 define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optnone {
204 ; CHECK-COMMON-LABEL: normal_call_to_streaming_callee_ptr:
205 ; CHECK-COMMON: // %bb.0:
206 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
207 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
208 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
209 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
210 ; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
211 ; CHECK-COMMON-NEXT: cntd x9
212 ; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
213 ; CHECK-COMMON-NEXT: smstart sm
214 ; CHECK-COMMON-NEXT: blr x0
215 ; CHECK-COMMON-NEXT: smstop sm
216 ; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
217 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
218 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
219 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
220 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
221 ; CHECK-COMMON-NEXT: ret
222 call void %p() "aarch64_pstate_sm_enabled"
230 declare double @za_shared_callee(double) "aarch64_inout_za"
232 define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_new_za"{
233 ; CHECK-COMMON-LABEL: za_new_caller_to_za_shared_callee:
234 ; CHECK-COMMON: // %bb.0: // %prelude
235 ; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
236 ; CHECK-COMMON-NEXT: rdsvl x8, #1
237 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
238 ; CHECK-COMMON-NEXT: cbz x8, .LBB6_2
239 ; CHECK-COMMON-NEXT: b .LBB6_1
240 ; CHECK-COMMON-NEXT: .LBB6_1: // %save.za
241 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_save
242 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
243 ; CHECK-COMMON-NEXT: b .LBB6_2
244 ; CHECK-COMMON-NEXT: .LBB6_2: // %entry
245 ; CHECK-COMMON-NEXT: smstart za
246 ; CHECK-COMMON-NEXT: zero {za}
247 ; CHECK-COMMON-NEXT: bl za_shared_callee
248 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
249 ; CHECK-COMMON-NEXT: fmov d1, x8
250 ; CHECK-COMMON-NEXT: fadd d0, d0, d1
251 ; CHECK-COMMON-NEXT: smstop za
252 ; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
253 ; CHECK-COMMON-NEXT: ret
255 %call = call double @za_shared_callee(double %x)
256 %add = fadd double %call, 4.200000e+01
260 define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_inout_za"{
261 ; CHECK-COMMON-LABEL: za_shared_caller_to_za_none_callee:
262 ; CHECK-COMMON: // %bb.0: // %entry
263 ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
264 ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
265 ; CHECK-COMMON-NEXT: mov x29, sp
266 ; CHECK-COMMON-NEXT: sub sp, sp, #16
267 ; CHECK-COMMON-NEXT: rdsvl x8, #1
268 ; CHECK-COMMON-NEXT: mov x9, sp
269 ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
270 ; CHECK-COMMON-NEXT: mov sp, x9
271 ; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
272 ; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
273 ; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
274 ; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
275 ; CHECK-COMMON-NEXT: sub x8, x29, #16
276 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
277 ; CHECK-COMMON-NEXT: bl normal_callee
278 ; CHECK-COMMON-NEXT: smstart za
279 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
280 ; CHECK-COMMON-NEXT: sub x0, x29, #16
281 ; CHECK-COMMON-NEXT: cbz x8, .LBB7_1
282 ; CHECK-COMMON-NEXT: b .LBB7_2
283 ; CHECK-COMMON-NEXT: .LBB7_1: // %entry
284 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
285 ; CHECK-COMMON-NEXT: b .LBB7_2
286 ; CHECK-COMMON-NEXT: .LBB7_2: // %entry
287 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
288 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
289 ; CHECK-COMMON-NEXT: fmov d1, x8
290 ; CHECK-COMMON-NEXT: fadd d0, d0, d1
291 ; CHECK-COMMON-NEXT: mov sp, x29
292 ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
293 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
294 ; CHECK-COMMON-NEXT: ret
296 %call = call double @normal_callee(double %x)
297 %add = fadd double %call, 4.200000e+01
301 ; Ensure we set up and restore the lazy save correctly for instructions which are lowered to lib calls.
302 define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
303 ; CHECK-COMMON-LABEL: f128_call_za:
304 ; CHECK-COMMON: // %bb.0:
305 ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
306 ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
307 ; CHECK-COMMON-NEXT: mov x29, sp
308 ; CHECK-COMMON-NEXT: sub sp, sp, #16
309 ; CHECK-COMMON-NEXT: rdsvl x8, #1
310 ; CHECK-COMMON-NEXT: mov x9, sp
311 ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
312 ; CHECK-COMMON-NEXT: mov sp, x9
313 ; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
314 ; CHECK-COMMON-NEXT: sub x9, x29, #16
315 ; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
316 ; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
317 ; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
318 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
319 ; CHECK-COMMON-NEXT: bl __addtf3
320 ; CHECK-COMMON-NEXT: smstart za
321 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
322 ; CHECK-COMMON-NEXT: sub x0, x29, #16
323 ; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2
324 ; CHECK-COMMON-NEXT: // %bb.1:
325 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
326 ; CHECK-COMMON-NEXT: .LBB8_2:
327 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
328 ; CHECK-COMMON-NEXT: mov sp, x29
329 ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
330 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
331 ; CHECK-COMMON-NEXT: ret
332 %res = fadd fp128 %a, %b
337 ; Ensure we fall back to SelectionDAG isel here so that we temporarily disable streaming mode to lower the fadd (with function calls).
338 define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounwind {
339 ; CHECK-COMMON-LABEL: f128_call_sm:
340 ; CHECK-COMMON: // %bb.0:
341 ; CHECK-COMMON-NEXT: sub sp, sp, #112
342 ; CHECK-COMMON-NEXT: cntd x9
343 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
344 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
345 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
346 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
347 ; CHECK-COMMON-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill
348 ; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
349 ; CHECK-COMMON-NEXT: smstop sm
350 ; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
351 ; CHECK-COMMON-NEXT: bl __addtf3
352 ; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
353 ; CHECK-COMMON-NEXT: smstart sm
354 ; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
355 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
356 ; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
357 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
358 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
359 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
360 ; CHECK-COMMON-NEXT: add sp, sp, #112
361 ; CHECK-COMMON-NEXT: ret
362 %res = fadd fp128 %a, %b
366 ; As above this should use Selection DAG to make sure the libcall call is lowered correctly.
367 define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind {
368 ; CHECK-COMMON-LABEL: frem_call_za:
369 ; CHECK-COMMON: // %bb.0:
370 ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
371 ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
372 ; CHECK-COMMON-NEXT: mov x29, sp
373 ; CHECK-COMMON-NEXT: sub sp, sp, #16
374 ; CHECK-COMMON-NEXT: rdsvl x8, #1
375 ; CHECK-COMMON-NEXT: mov x9, sp
376 ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
377 ; CHECK-COMMON-NEXT: mov sp, x9
378 ; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
379 ; CHECK-COMMON-NEXT: sub x9, x29, #16
380 ; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
381 ; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
382 ; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
383 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
384 ; CHECK-COMMON-NEXT: bl fmod
385 ; CHECK-COMMON-NEXT: smstart za
386 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
387 ; CHECK-COMMON-NEXT: sub x0, x29, #16
388 ; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2
389 ; CHECK-COMMON-NEXT: // %bb.1:
390 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
391 ; CHECK-COMMON-NEXT: .LBB10_2:
392 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
393 ; CHECK-COMMON-NEXT: mov sp, x29
394 ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
395 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
396 ; CHECK-COMMON-NEXT: ret
397 %res = frem double %a, %b
401 ; As above this should use Selection DAG to make sure the libcall is lowered correctly.
402 define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
403 ; CHECK-COMMON-LABEL: frem_call_sm:
404 ; CHECK-COMMON: // %bb.0:
405 ; CHECK-COMMON-NEXT: sub sp, sp, #96
406 ; CHECK-COMMON-NEXT: cntd x9
407 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
408 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
409 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
410 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
411 ; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
412 ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
413 ; CHECK-COMMON-NEXT: smstop sm
414 ; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
415 ; CHECK-COMMON-NEXT: bl fmodf
416 ; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
417 ; CHECK-COMMON-NEXT: smstart sm
418 ; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
419 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
420 ; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
421 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
422 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
423 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
424 ; CHECK-COMMON-NEXT: add sp, sp, #96
425 ; CHECK-COMMON-NEXT: ret
426 %res = frem float %a, %b
430 ; As above this should use Selection DAG to make sure the libcall is lowered correctly.
431 define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind {
432 ; CHECK-COMMON-LABEL: frem_call_sm_compat:
433 ; CHECK-COMMON: // %bb.0:
434 ; CHECK-COMMON-NEXT: sub sp, sp, #112
435 ; CHECK-COMMON-NEXT: cntd x9
436 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
437 ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
438 ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
439 ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
440 ; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
441 ; CHECK-COMMON-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
442 ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
443 ; CHECK-COMMON-NEXT: bl __arm_sme_state
444 ; CHECK-COMMON-NEXT: and x19, x0, #0x1
445 ; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
446 ; CHECK-COMMON-NEXT: // %bb.1:
447 ; CHECK-COMMON-NEXT: smstop sm
448 ; CHECK-COMMON-NEXT: .LBB12_2:
449 ; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
450 ; CHECK-COMMON-NEXT: bl fmodf
451 ; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
452 ; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_4
453 ; CHECK-COMMON-NEXT: // %bb.3:
454 ; CHECK-COMMON-NEXT: smstart sm
455 ; CHECK-COMMON-NEXT: .LBB12_4:
456 ; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
457 ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
458 ; CHECK-COMMON-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
459 ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
460 ; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
461 ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
462 ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
463 ; CHECK-COMMON-NEXT: add sp, sp, #112
464 ; CHECK-COMMON-NEXT: ret
465 %res = frem float %a, %b
473 declare double @zt0_shared_callee(double) "aarch64_inout_zt0"
475 define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline optnone "aarch64_new_zt0" {
476 ; CHECK-COMMON-LABEL: zt0_new_caller_to_zt0_shared_callee:
477 ; CHECK-COMMON: // %bb.0: // %prelude
478 ; CHECK-COMMON-NEXT: sub sp, sp, #80
479 ; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
480 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
481 ; CHECK-COMMON-NEXT: cbz x8, .LBB13_2
482 ; CHECK-COMMON-NEXT: b .LBB13_1
483 ; CHECK-COMMON-NEXT: .LBB13_1: // %save.za
484 ; CHECK-COMMON-NEXT: mov x8, sp
485 ; CHECK-COMMON-NEXT: str zt0, [x8]
486 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_save
487 ; CHECK-COMMON-NEXT: ldr zt0, [x8]
488 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
489 ; CHECK-COMMON-NEXT: b .LBB13_2
490 ; CHECK-COMMON-NEXT: .LBB13_2: // %entry
491 ; CHECK-COMMON-NEXT: smstart za
492 ; CHECK-COMMON-NEXT: zero { zt0 }
493 ; CHECK-COMMON-NEXT: bl zt0_shared_callee
494 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
495 ; CHECK-COMMON-NEXT: fmov d1, x8
496 ; CHECK-COMMON-NEXT: fadd d0, d0, d1
497 ; CHECK-COMMON-NEXT: smstop za
498 ; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
499 ; CHECK-COMMON-NEXT: add sp, sp, #80
500 ; CHECK-COMMON-NEXT: ret
502 %call = call double @zt0_shared_callee(double %x)
503 %add = fadd double %call, 4.200000e+01
507 define double @zt0_shared_caller_to_normal_callee(double %x) nounwind noinline optnone "aarch64_inout_zt0" {
508 ; CHECK-COMMON-LABEL: zt0_shared_caller_to_normal_callee:
509 ; CHECK-COMMON: // %bb.0: // %entry
510 ; CHECK-COMMON-NEXT: sub sp, sp, #80
511 ; CHECK-COMMON-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
512 ; CHECK-COMMON-NEXT: mov x19, sp
513 ; CHECK-COMMON-NEXT: str zt0, [x19]
514 ; CHECK-COMMON-NEXT: smstop za
515 ; CHECK-COMMON-NEXT: bl normal_callee
516 ; CHECK-COMMON-NEXT: smstart za
517 ; CHECK-COMMON-NEXT: ldr zt0, [x19]
518 ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
519 ; CHECK-COMMON-NEXT: fmov d1, x8
520 ; CHECK-COMMON-NEXT: fadd d0, d0, d1
521 ; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
522 ; CHECK-COMMON-NEXT: add sp, sp, #80
523 ; CHECK-COMMON-NEXT: ret
525 %call = call double @normal_callee(double %x)
526 %add = fadd double %call, 4.200000e+01