1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=FP-CHECK
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -frame-pointer=non-leaf -verify-machineinstrs < %s | FileCheck %s --check-prefix=NO-SVE-CHECK
4 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -verify-machineinstrs -enable-machine-outliner < %s | FileCheck %s --check-prefix=OUTLINER-CHECK
6 declare void @callee();
7 declare void @fixed_callee(<4 x i32>);
8 declare void @scalable_callee(<vscale x 2 x i64>);
10 declare void @streaming_callee() #0;
11 declare void @streaming_callee_with_arg(i32) #0;
13 ; Simple example of a function with one call requiring a streaming mode change
15 define void @vg_unwind_simple() #0 {
16 ; CHECK-LABEL: vg_unwind_simple:
18 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
19 ; CHECK-NEXT: .cfi_def_cfa_offset 80
21 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
22 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
23 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
24 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
25 ; CHECK-NEXT: .cfi_offset w30, -16
26 ; CHECK-NEXT: .cfi_offset b8, -24
27 ; CHECK-NEXT: .cfi_offset b9, -32
28 ; CHECK-NEXT: .cfi_offset b10, -40
29 ; CHECK-NEXT: .cfi_offset b11, -48
30 ; CHECK-NEXT: .cfi_offset b12, -56
31 ; CHECK-NEXT: .cfi_offset b13, -64
32 ; CHECK-NEXT: .cfi_offset b14, -72
33 ; CHECK-NEXT: .cfi_offset b15, -80
34 ; CHECK-NEXT: .cfi_offset vg, -8
35 ; CHECK-NEXT: smstop sm
36 ; CHECK-NEXT: bl callee
37 ; CHECK-NEXT: smstart sm
38 ; CHECK-NEXT: .cfi_restore vg
39 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
40 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
41 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
42 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
43 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
44 ; CHECK-NEXT: .cfi_def_cfa_offset 0
45 ; CHECK-NEXT: .cfi_restore w30
46 ; CHECK-NEXT: .cfi_restore b8
47 ; CHECK-NEXT: .cfi_restore b9
48 ; CHECK-NEXT: .cfi_restore b10
49 ; CHECK-NEXT: .cfi_restore b11
50 ; CHECK-NEXT: .cfi_restore b12
51 ; CHECK-NEXT: .cfi_restore b13
52 ; CHECK-NEXT: .cfi_restore b14
53 ; CHECK-NEXT: .cfi_restore b15
56 ; FP-CHECK-LABEL: vg_unwind_simple:
58 ; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
59 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 96
60 ; FP-CHECK-NEXT: cntd x9
61 ; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
62 ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
63 ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
64 ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
65 ; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
66 ; FP-CHECK-NEXT: add x29, sp, #64
67 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32
68 ; FP-CHECK-NEXT: .cfi_offset w30, -24
69 ; FP-CHECK-NEXT: .cfi_offset w29, -32
70 ; FP-CHECK-NEXT: .cfi_offset b8, -40
71 ; FP-CHECK-NEXT: .cfi_offset b9, -48
72 ; FP-CHECK-NEXT: .cfi_offset b10, -56
73 ; FP-CHECK-NEXT: .cfi_offset b11, -64
74 ; FP-CHECK-NEXT: .cfi_offset b12, -72
75 ; FP-CHECK-NEXT: .cfi_offset b13, -80
76 ; FP-CHECK-NEXT: .cfi_offset b14, -88
77 ; FP-CHECK-NEXT: .cfi_offset b15, -96
78 ; FP-CHECK-NEXT: .cfi_offset vg, -16
79 ; FP-CHECK-NEXT: smstop sm
80 ; FP-CHECK-NEXT: bl callee
81 ; FP-CHECK-NEXT: smstart sm
82 ; FP-CHECK-NEXT: .cfi_restore vg
83 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96
84 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
85 ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
86 ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
87 ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
88 ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
89 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0
90 ; FP-CHECK-NEXT: .cfi_restore w30
91 ; FP-CHECK-NEXT: .cfi_restore w29
92 ; FP-CHECK-NEXT: .cfi_restore b8
93 ; FP-CHECK-NEXT: .cfi_restore b9
94 ; FP-CHECK-NEXT: .cfi_restore b10
95 ; FP-CHECK-NEXT: .cfi_restore b11
96 ; FP-CHECK-NEXT: .cfi_restore b12
97 ; FP-CHECK-NEXT: .cfi_restore b13
98 ; FP-CHECK-NEXT: .cfi_restore b14
99 ; FP-CHECK-NEXT: .cfi_restore b15
102 ; OUTLINER-CHECK-LABEL: vg_unwind_simple:
103 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
109 ; As above, with an extra register clobbered by the inline asm call which
110 ; changes NeedsGapToAlignStack to false
112 define void @vg_unwind_needs_gap() #0 {
113 ; CHECK-LABEL: vg_unwind_needs_gap:
115 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
116 ; CHECK-NEXT: .cfi_def_cfa_offset 96
117 ; CHECK-NEXT: cntd x9
118 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
119 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
120 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
121 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
122 ; CHECK-NEXT: str x20, [sp, #80] // 8-byte Folded Spill
123 ; CHECK-NEXT: .cfi_offset w20, -16
124 ; CHECK-NEXT: .cfi_offset w30, -32
125 ; CHECK-NEXT: .cfi_offset b8, -40
126 ; CHECK-NEXT: .cfi_offset b9, -48
127 ; CHECK-NEXT: .cfi_offset b10, -56
128 ; CHECK-NEXT: .cfi_offset b11, -64
129 ; CHECK-NEXT: .cfi_offset b12, -72
130 ; CHECK-NEXT: .cfi_offset b13, -80
131 ; CHECK-NEXT: .cfi_offset b14, -88
132 ; CHECK-NEXT: .cfi_offset b15, -96
134 ; CHECK-NEXT: //NO_APP
135 ; CHECK-NEXT: .cfi_offset vg, -24
136 ; CHECK-NEXT: smstop sm
137 ; CHECK-NEXT: bl callee
138 ; CHECK-NEXT: smstart sm
139 ; CHECK-NEXT: .cfi_restore vg
140 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
141 ; CHECK-NEXT: ldr x20, [sp, #80] // 8-byte Folded Reload
142 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
143 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
144 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
145 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
146 ; CHECK-NEXT: .cfi_def_cfa_offset 0
147 ; CHECK-NEXT: .cfi_restore w20
148 ; CHECK-NEXT: .cfi_restore w30
149 ; CHECK-NEXT: .cfi_restore b8
150 ; CHECK-NEXT: .cfi_restore b9
151 ; CHECK-NEXT: .cfi_restore b10
152 ; CHECK-NEXT: .cfi_restore b11
153 ; CHECK-NEXT: .cfi_restore b12
154 ; CHECK-NEXT: .cfi_restore b13
155 ; CHECK-NEXT: .cfi_restore b14
156 ; CHECK-NEXT: .cfi_restore b15
159 ; FP-CHECK-LABEL: vg_unwind_needs_gap:
160 ; FP-CHECK: // %bb.0:
161 ; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
162 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 96
163 ; FP-CHECK-NEXT: cntd x9
164 ; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
165 ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
166 ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
167 ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
168 ; FP-CHECK-NEXT: stp x9, x20, [sp, #80] // 16-byte Folded Spill
169 ; FP-CHECK-NEXT: add x29, sp, #64
170 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32
171 ; FP-CHECK-NEXT: .cfi_offset w20, -8
172 ; FP-CHECK-NEXT: .cfi_offset w30, -24
173 ; FP-CHECK-NEXT: .cfi_offset w29, -32
174 ; FP-CHECK-NEXT: .cfi_offset b8, -40
175 ; FP-CHECK-NEXT: .cfi_offset b9, -48
176 ; FP-CHECK-NEXT: .cfi_offset b10, -56
177 ; FP-CHECK-NEXT: .cfi_offset b11, -64
178 ; FP-CHECK-NEXT: .cfi_offset b12, -72
179 ; FP-CHECK-NEXT: .cfi_offset b13, -80
180 ; FP-CHECK-NEXT: .cfi_offset b14, -88
181 ; FP-CHECK-NEXT: .cfi_offset b15, -96
182 ; FP-CHECK-NEXT: //APP
183 ; FP-CHECK-NEXT: //NO_APP
184 ; FP-CHECK-NEXT: .cfi_offset vg, -16
185 ; FP-CHECK-NEXT: smstop sm
186 ; FP-CHECK-NEXT: bl callee
187 ; FP-CHECK-NEXT: smstart sm
188 ; FP-CHECK-NEXT: .cfi_restore vg
189 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96
190 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
191 ; FP-CHECK-NEXT: ldr x20, [sp, #88] // 8-byte Folded Reload
192 ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
193 ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
194 ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
195 ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
196 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0
197 ; FP-CHECK-NEXT: .cfi_restore w20
198 ; FP-CHECK-NEXT: .cfi_restore w30
199 ; FP-CHECK-NEXT: .cfi_restore w29
200 ; FP-CHECK-NEXT: .cfi_restore b8
201 ; FP-CHECK-NEXT: .cfi_restore b9
202 ; FP-CHECK-NEXT: .cfi_restore b10
203 ; FP-CHECK-NEXT: .cfi_restore b11
204 ; FP-CHECK-NEXT: .cfi_restore b12
205 ; FP-CHECK-NEXT: .cfi_restore b13
206 ; FP-CHECK-NEXT: .cfi_restore b14
207 ; FP-CHECK-NEXT: .cfi_restore b15
210 ; OUTLINER-CHECK-LABEL: vg_unwind_needs_gap:
211 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
213 call void asm sideeffect "", "~{x20}"()
218 define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 {
219 ; CHECK-LABEL: vg_unwind_with_fixed_args:
221 ; CHECK-NEXT: sub sp, sp, #96
222 ; CHECK-NEXT: .cfi_def_cfa_offset 96
223 ; CHECK-NEXT: cntd x9
224 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
225 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
226 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
227 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
228 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
229 ; CHECK-NEXT: .cfi_offset w30, -16
230 ; CHECK-NEXT: .cfi_offset b8, -24
231 ; CHECK-NEXT: .cfi_offset b9, -32
232 ; CHECK-NEXT: .cfi_offset b10, -40
233 ; CHECK-NEXT: .cfi_offset b11, -48
234 ; CHECK-NEXT: .cfi_offset b12, -56
235 ; CHECK-NEXT: .cfi_offset b13, -64
236 ; CHECK-NEXT: .cfi_offset b14, -72
237 ; CHECK-NEXT: .cfi_offset b15, -80
238 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
239 ; CHECK-NEXT: .cfi_offset vg, -8
240 ; CHECK-NEXT: smstop sm
241 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
242 ; CHECK-NEXT: bl fixed_callee
243 ; CHECK-NEXT: smstart sm
244 ; CHECK-NEXT: .cfi_restore vg
245 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
246 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
247 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
248 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
249 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
250 ; CHECK-NEXT: add sp, sp, #96
251 ; CHECK-NEXT: .cfi_def_cfa_offset 0
252 ; CHECK-NEXT: .cfi_restore w30
253 ; CHECK-NEXT: .cfi_restore b8
254 ; CHECK-NEXT: .cfi_restore b9
255 ; CHECK-NEXT: .cfi_restore b10
256 ; CHECK-NEXT: .cfi_restore b11
257 ; CHECK-NEXT: .cfi_restore b12
258 ; CHECK-NEXT: .cfi_restore b13
259 ; CHECK-NEXT: .cfi_restore b14
260 ; CHECK-NEXT: .cfi_restore b15
263 ; FP-CHECK-LABEL: vg_unwind_with_fixed_args:
264 ; FP-CHECK: // %bb.0:
265 ; FP-CHECK-NEXT: sub sp, sp, #112
266 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 112
267 ; FP-CHECK-NEXT: cntd x9
268 ; FP-CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
269 ; FP-CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
270 ; FP-CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
271 ; FP-CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
272 ; FP-CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill
273 ; FP-CHECK-NEXT: str x9, [sp, #96] // 8-byte Folded Spill
274 ; FP-CHECK-NEXT: add x29, sp, #80
275 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32
276 ; FP-CHECK-NEXT: .cfi_offset w30, -24
277 ; FP-CHECK-NEXT: .cfi_offset w29, -32
278 ; FP-CHECK-NEXT: .cfi_offset b8, -40
279 ; FP-CHECK-NEXT: .cfi_offset b9, -48
280 ; FP-CHECK-NEXT: .cfi_offset b10, -56
281 ; FP-CHECK-NEXT: .cfi_offset b11, -64
282 ; FP-CHECK-NEXT: .cfi_offset b12, -72
283 ; FP-CHECK-NEXT: .cfi_offset b13, -80
284 ; FP-CHECK-NEXT: .cfi_offset b14, -88
285 ; FP-CHECK-NEXT: .cfi_offset b15, -96
286 ; FP-CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
287 ; FP-CHECK-NEXT: .cfi_offset vg, -16
288 ; FP-CHECK-NEXT: smstop sm
289 ; FP-CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
290 ; FP-CHECK-NEXT: bl fixed_callee
291 ; FP-CHECK-NEXT: smstart sm
292 ; FP-CHECK-NEXT: .cfi_restore vg
293 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 112
294 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload
295 ; FP-CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
296 ; FP-CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
297 ; FP-CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
298 ; FP-CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
299 ; FP-CHECK-NEXT: add sp, sp, #112
300 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0
301 ; FP-CHECK-NEXT: .cfi_restore w30
302 ; FP-CHECK-NEXT: .cfi_restore w29
303 ; FP-CHECK-NEXT: .cfi_restore b8
304 ; FP-CHECK-NEXT: .cfi_restore b9
305 ; FP-CHECK-NEXT: .cfi_restore b10
306 ; FP-CHECK-NEXT: .cfi_restore b11
307 ; FP-CHECK-NEXT: .cfi_restore b12
308 ; FP-CHECK-NEXT: .cfi_restore b13
309 ; FP-CHECK-NEXT: .cfi_restore b14
310 ; FP-CHECK-NEXT: .cfi_restore b15
313 ; OUTLINER-CHECK-LABEL: vg_unwind_with_fixed_args:
314 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
316 call void @fixed_callee(<4 x i32> %x);
320 define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
321 ; CHECK-LABEL: vg_unwind_with_sve_args:
323 ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
324 ; CHECK-NEXT: .cfi_def_cfa_offset 32
325 ; CHECK-NEXT: cntd x9
326 ; CHECK-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill
327 ; CHECK-NEXT: .cfi_offset w28, -8
328 ; CHECK-NEXT: .cfi_offset w30, -24
329 ; CHECK-NEXT: .cfi_offset w29, -32
330 ; CHECK-NEXT: addvl sp, sp, #-18
331 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
332 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
333 ; CHECK-NEXT: ptrue pn8.b
334 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
335 ; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
336 ; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
337 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
338 ; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
339 ; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
340 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
341 ; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
342 ; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
343 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
344 ; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
345 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
346 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
347 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
348 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
349 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
350 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
351 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
352 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
353 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
354 ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
355 ; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
356 ; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
357 ; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG
358 ; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 32 - 40 * VG
359 ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
360 ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
361 ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
362 ; CHECK-NEXT: addvl sp, sp, #-1
363 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 152 * VG
364 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
366 ; CHECK-NEXT: //NO_APP
367 ; CHECK-NEXT: .cfi_offset vg, -16
368 ; CHECK-NEXT: smstop sm
369 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
370 ; CHECK-NEXT: bl scalable_callee
371 ; CHECK-NEXT: smstart sm
372 ; CHECK-NEXT: .cfi_restore vg
373 ; CHECK-NEXT: addvl sp, sp, #1
374 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
375 ; CHECK-NEXT: ptrue pn8.b
376 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
377 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
378 ; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
379 ; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
380 ; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
381 ; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
382 ; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
383 ; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
384 ; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
385 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
386 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
387 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
388 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
389 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
390 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
391 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
392 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
393 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
394 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
395 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
396 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
397 ; CHECK-NEXT: addvl sp, sp, #18
398 ; CHECK-NEXT: .cfi_def_cfa wsp, 32
399 ; CHECK-NEXT: .cfi_restore z8
400 ; CHECK-NEXT: .cfi_restore z9
401 ; CHECK-NEXT: .cfi_restore z10
402 ; CHECK-NEXT: .cfi_restore z11
403 ; CHECK-NEXT: .cfi_restore z12
404 ; CHECK-NEXT: .cfi_restore z13
405 ; CHECK-NEXT: .cfi_restore z14
406 ; CHECK-NEXT: .cfi_restore z15
407 ; CHECK-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload
408 ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
409 ; CHECK-NEXT: .cfi_def_cfa_offset 0
410 ; CHECK-NEXT: .cfi_restore w28
411 ; CHECK-NEXT: .cfi_restore w30
412 ; CHECK-NEXT: .cfi_restore w29
415 ; FP-CHECK-LABEL: vg_unwind_with_sve_args:
416 ; FP-CHECK: // %bb.0:
417 ; FP-CHECK-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
418 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 48
419 ; FP-CHECK-NEXT: cntd x9
420 ; FP-CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
421 ; FP-CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
422 ; FP-CHECK-NEXT: mov x29, sp
423 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 48
424 ; FP-CHECK-NEXT: .cfi_offset w27, -8
425 ; FP-CHECK-NEXT: .cfi_offset w28, -16
426 ; FP-CHECK-NEXT: .cfi_offset w30, -40
427 ; FP-CHECK-NEXT: .cfi_offset w29, -48
428 ; FP-CHECK-NEXT: addvl sp, sp, #-18
429 ; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
430 ; FP-CHECK-NEXT: ptrue pn8.b
431 ; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
432 ; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
433 ; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
434 ; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
435 ; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
436 ; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
437 ; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
438 ; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
439 ; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
440 ; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
441 ; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
442 ; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
443 ; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
444 ; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
445 ; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
446 ; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
447 ; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
448 ; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
449 ; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
450 ; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
451 ; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
452 ; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
453 ; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
454 ; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG
455 ; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG
456 ; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG
457 ; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG
458 ; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG
459 ; FP-CHECK-NEXT: addvl sp, sp, #-1
460 ; FP-CHECK-NEXT: str z0, [x29, #-19, mul vl] // 16-byte Folded Spill
461 ; FP-CHECK-NEXT: //APP
462 ; FP-CHECK-NEXT: //NO_APP
463 ; FP-CHECK-NEXT: .cfi_offset vg, -32
464 ; FP-CHECK-NEXT: smstop sm
465 ; FP-CHECK-NEXT: ldr z0, [x29, #-19, mul vl] // 16-byte Folded Reload
466 ; FP-CHECK-NEXT: bl scalable_callee
467 ; FP-CHECK-NEXT: smstart sm
468 ; FP-CHECK-NEXT: .cfi_restore vg
469 ; FP-CHECK-NEXT: addvl sp, sp, #1
470 ; FP-CHECK-NEXT: ptrue pn8.b
471 ; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
472 ; FP-CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
473 ; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
474 ; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
475 ; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
476 ; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
477 ; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
478 ; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
479 ; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
480 ; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
481 ; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
482 ; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
483 ; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
484 ; FP-CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
485 ; FP-CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
486 ; FP-CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
487 ; FP-CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
488 ; FP-CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
489 ; FP-CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
490 ; FP-CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
491 ; FP-CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
492 ; FP-CHECK-NEXT: addvl sp, sp, #18
493 ; FP-CHECK-NEXT: .cfi_restore z8
494 ; FP-CHECK-NEXT: .cfi_restore z9
495 ; FP-CHECK-NEXT: .cfi_restore z10
496 ; FP-CHECK-NEXT: .cfi_restore z11
497 ; FP-CHECK-NEXT: .cfi_restore z12
498 ; FP-CHECK-NEXT: .cfi_restore z13
499 ; FP-CHECK-NEXT: .cfi_restore z14
500 ; FP-CHECK-NEXT: .cfi_restore z15
501 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 48
502 ; FP-CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
503 ; FP-CHECK-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload
504 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0
505 ; FP-CHECK-NEXT: .cfi_restore w27
506 ; FP-CHECK-NEXT: .cfi_restore w28
507 ; FP-CHECK-NEXT: .cfi_restore w30
508 ; FP-CHECK-NEXT: .cfi_restore w29
511 ; OUTLINER-CHECK-LABEL: vg_unwind_with_sve_args:
512 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
514 call void asm sideeffect "", "~{x28}"()
515 call void @scalable_callee(<vscale x 2 x i64> %x);
519 ; This test was based on stack-probing-64k.ll and tries to test multiple uses of
520 ; findScratchNonCalleeSaveRegister.
522 define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 {
523 ; CHECK-LABEL: vg_unwind_multiple_scratch_regs:
524 ; CHECK: // %bb.0: // %entry
525 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
526 ; CHECK-NEXT: .cfi_def_cfa_offset 96
527 ; CHECK-NEXT: cntd x9
528 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
529 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
530 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
531 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
532 ; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
533 ; CHECK-NEXT: .cfi_offset w30, -24
534 ; CHECK-NEXT: .cfi_offset w29, -32
535 ; CHECK-NEXT: .cfi_offset b8, -40
536 ; CHECK-NEXT: .cfi_offset b9, -48
537 ; CHECK-NEXT: .cfi_offset b10, -56
538 ; CHECK-NEXT: .cfi_offset b11, -64
539 ; CHECK-NEXT: .cfi_offset b12, -72
540 ; CHECK-NEXT: .cfi_offset b13, -80
541 ; CHECK-NEXT: .cfi_offset b14, -88
542 ; CHECK-NEXT: .cfi_offset b15, -96
543 ; CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680
544 ; CHECK-NEXT: .cfi_def_cfa w9, 327776
545 ; CHECK-NEXT: .LBB4_1: // %entry
546 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
547 ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
548 ; CHECK-NEXT: cmp sp, x9
549 ; CHECK-NEXT: str xzr, [sp]
550 ; CHECK-NEXT: b.ne .LBB4_1
551 ; CHECK-NEXT: // %bb.2: // %entry
552 ; CHECK-NEXT: .cfi_def_cfa_register wsp
553 ; CHECK-NEXT: mov x8, sp
554 ; CHECK-NEXT: str x8, [x0]
555 ; CHECK-NEXT: .cfi_offset vg, -16
556 ; CHECK-NEXT: smstop sm
557 ; CHECK-NEXT: bl callee
558 ; CHECK-NEXT: smstart sm
559 ; CHECK-NEXT: .cfi_restore vg
560 ; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680
561 ; CHECK-NEXT: .cfi_def_cfa_offset 96
562 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
563 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
564 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
565 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
566 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
567 ; CHECK-NEXT: .cfi_def_cfa_offset 0
568 ; CHECK-NEXT: .cfi_restore w30
569 ; CHECK-NEXT: .cfi_restore w29
570 ; CHECK-NEXT: .cfi_restore b8
571 ; CHECK-NEXT: .cfi_restore b9
572 ; CHECK-NEXT: .cfi_restore b10
573 ; CHECK-NEXT: .cfi_restore b11
574 ; CHECK-NEXT: .cfi_restore b12
575 ; CHECK-NEXT: .cfi_restore b13
576 ; CHECK-NEXT: .cfi_restore b14
577 ; CHECK-NEXT: .cfi_restore b15
580 ; FP-CHECK-LABEL: vg_unwind_multiple_scratch_regs:
581 ; FP-CHECK: // %bb.0: // %entry
582 ; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
583 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 96
584 ; FP-CHECK-NEXT: cntd x9
585 ; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
586 ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
587 ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
588 ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
589 ; FP-CHECK-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill
590 ; FP-CHECK-NEXT: add x29, sp, #64
591 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32
592 ; FP-CHECK-NEXT: .cfi_offset w28, -8
593 ; FP-CHECK-NEXT: .cfi_offset w30, -24
594 ; FP-CHECK-NEXT: .cfi_offset w29, -32
595 ; FP-CHECK-NEXT: .cfi_offset b8, -40
596 ; FP-CHECK-NEXT: .cfi_offset b9, -48
597 ; FP-CHECK-NEXT: .cfi_offset b10, -56
598 ; FP-CHECK-NEXT: .cfi_offset b11, -64
599 ; FP-CHECK-NEXT: .cfi_offset b12, -72
600 ; FP-CHECK-NEXT: .cfi_offset b13, -80
601 ; FP-CHECK-NEXT: .cfi_offset b14, -88
602 ; FP-CHECK-NEXT: .cfi_offset b15, -96
603 ; FP-CHECK-NEXT: sub x9, sp, #80, lsl #12 // =327680
604 ; FP-CHECK-NEXT: .LBB4_1: // %entry
605 ; FP-CHECK-NEXT: // =>This Inner Loop Header: Depth=1
606 ; FP-CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
607 ; FP-CHECK-NEXT: cmp sp, x9
608 ; FP-CHECK-NEXT: str xzr, [sp]
609 ; FP-CHECK-NEXT: b.ne .LBB4_1
610 ; FP-CHECK-NEXT: // %bb.2: // %entry
611 ; FP-CHECK-NEXT: mov x8, sp
612 ; FP-CHECK-NEXT: str x8, [x0]
613 ; FP-CHECK-NEXT: .cfi_offset vg, -16
614 ; FP-CHECK-NEXT: smstop sm
615 ; FP-CHECK-NEXT: bl callee
616 ; FP-CHECK-NEXT: smstart sm
617 ; FP-CHECK-NEXT: .cfi_restore vg
618 ; FP-CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680
619 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96
620 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
621 ; FP-CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload
622 ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
623 ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
624 ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
625 ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
626 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0
627 ; FP-CHECK-NEXT: .cfi_restore w28
628 ; FP-CHECK-NEXT: .cfi_restore w30
629 ; FP-CHECK-NEXT: .cfi_restore w29
630 ; FP-CHECK-NEXT: .cfi_restore b8
631 ; FP-CHECK-NEXT: .cfi_restore b9
632 ; FP-CHECK-NEXT: .cfi_restore b10
633 ; FP-CHECK-NEXT: .cfi_restore b11
634 ; FP-CHECK-NEXT: .cfi_restore b12
635 ; FP-CHECK-NEXT: .cfi_restore b13
636 ; FP-CHECK-NEXT: .cfi_restore b14
637 ; FP-CHECK-NEXT: .cfi_restore b15
640 ; OUTLINER-CHECK-LABEL: vg_unwind_multiple_scratch_regs:
641 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
644 %v = alloca i8, i64 327680, align 1
645 store ptr %v, ptr %out, align 8
650 ; Locally streaming functions require storing both the streaming and
651 ; non-streaming values of VG.
653 define void @vg_locally_streaming_fn() #3 {
654 ; CHECK-LABEL: vg_locally_streaming_fn:
656 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
657 ; CHECK-NEXT: .cfi_def_cfa_offset 96
658 ; CHECK-NEXT: rdsvl x9, #1
659 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
660 ; CHECK-NEXT: lsr x9, x9, #3
661 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
662 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
663 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
664 ; CHECK-NEXT: cntd x9
665 ; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
666 ; CHECK-NEXT: .cfi_offset vg, -16
667 ; CHECK-NEXT: .cfi_offset w30, -32
668 ; CHECK-NEXT: .cfi_offset b8, -40
669 ; CHECK-NEXT: .cfi_offset b9, -48
670 ; CHECK-NEXT: .cfi_offset b10, -56
671 ; CHECK-NEXT: .cfi_offset b11, -64
672 ; CHECK-NEXT: .cfi_offset b12, -72
673 ; CHECK-NEXT: .cfi_offset b13, -80
674 ; CHECK-NEXT: .cfi_offset b14, -88
675 ; CHECK-NEXT: .cfi_offset b15, -96
676 ; CHECK-NEXT: bl callee
677 ; CHECK-NEXT: smstart sm
678 ; CHECK-NEXT: .cfi_restore vg
679 ; CHECK-NEXT: bl streaming_callee
680 ; CHECK-NEXT: .cfi_offset vg, -24
681 ; CHECK-NEXT: smstop sm
682 ; CHECK-NEXT: bl callee
683 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
684 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
685 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
686 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
687 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
688 ; CHECK-NEXT: .cfi_def_cfa_offset 0
689 ; CHECK-NEXT: .cfi_restore w30
690 ; CHECK-NEXT: .cfi_restore b8
691 ; CHECK-NEXT: .cfi_restore b9
692 ; CHECK-NEXT: .cfi_restore b10
693 ; CHECK-NEXT: .cfi_restore b11
694 ; CHECK-NEXT: .cfi_restore b12
695 ; CHECK-NEXT: .cfi_restore b13
696 ; CHECK-NEXT: .cfi_restore b14
697 ; CHECK-NEXT: .cfi_restore b15
700 ; FP-CHECK-LABEL: vg_locally_streaming_fn:
701 ; FP-CHECK: // %bb.0:
702 ; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
703 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 96
704 ; FP-CHECK-NEXT: rdsvl x9, #1
705 ; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
706 ; FP-CHECK-NEXT: lsr x9, x9, #3
707 ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
708 ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
709 ; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
710 ; FP-CHECK-NEXT: cntd x9
711 ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
712 ; FP-CHECK-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
713 ; FP-CHECK-NEXT: add x29, sp, #64
714 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32
715 ; FP-CHECK-NEXT: .cfi_offset vg, -8
716 ; FP-CHECK-NEXT: .cfi_offset w30, -24
717 ; FP-CHECK-NEXT: .cfi_offset w29, -32
718 ; FP-CHECK-NEXT: .cfi_offset b8, -40
719 ; FP-CHECK-NEXT: .cfi_offset b9, -48
720 ; FP-CHECK-NEXT: .cfi_offset b10, -56
721 ; FP-CHECK-NEXT: .cfi_offset b11, -64
722 ; FP-CHECK-NEXT: .cfi_offset b12, -72
723 ; FP-CHECK-NEXT: .cfi_offset b13, -80
724 ; FP-CHECK-NEXT: .cfi_offset b14, -88
725 ; FP-CHECK-NEXT: .cfi_offset b15, -96
726 ; FP-CHECK-NEXT: bl callee
727 ; FP-CHECK-NEXT: smstart sm
728 ; FP-CHECK-NEXT: .cfi_restore vg
729 ; FP-CHECK-NEXT: bl streaming_callee
730 ; FP-CHECK-NEXT: .cfi_offset vg, -16
731 ; FP-CHECK-NEXT: smstop sm
732 ; FP-CHECK-NEXT: bl callee
733 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96
734 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
735 ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
736 ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
737 ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
738 ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
739 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0
740 ; FP-CHECK-NEXT: .cfi_restore w30
741 ; FP-CHECK-NEXT: .cfi_restore w29
742 ; FP-CHECK-NEXT: .cfi_restore b8
743 ; FP-CHECK-NEXT: .cfi_restore b9
744 ; FP-CHECK-NEXT: .cfi_restore b10
745 ; FP-CHECK-NEXT: .cfi_restore b11
746 ; FP-CHECK-NEXT: .cfi_restore b12
747 ; FP-CHECK-NEXT: .cfi_restore b13
748 ; FP-CHECK-NEXT: .cfi_restore b14
749 ; FP-CHECK-NEXT: .cfi_restore b15
752 ; OUTLINER-CHECK-LABEL: vg_locally_streaming_fn:
753 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
756 call void @streaming_callee()
761 define void @streaming_compatible_to_streaming() #4 {
762 ; CHECK-LABEL: streaming_compatible_to_streaming:
764 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
765 ; CHECK-NEXT: .cfi_def_cfa_offset 96
766 ; CHECK-NEXT: cntd x9
767 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
768 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
769 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
770 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
771 ; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
772 ; CHECK-NEXT: .cfi_offset w19, -16
773 ; CHECK-NEXT: .cfi_offset w30, -32
774 ; CHECK-NEXT: .cfi_offset b8, -40
775 ; CHECK-NEXT: .cfi_offset b9, -48
776 ; CHECK-NEXT: .cfi_offset b10, -56
777 ; CHECK-NEXT: .cfi_offset b11, -64
778 ; CHECK-NEXT: .cfi_offset b12, -72
779 ; CHECK-NEXT: .cfi_offset b13, -80
780 ; CHECK-NEXT: .cfi_offset b14, -88
781 ; CHECK-NEXT: .cfi_offset b15, -96
782 ; CHECK-NEXT: bl __arm_sme_state
783 ; CHECK-NEXT: and x19, x0, #0x1
784 ; CHECK-NEXT: .cfi_offset vg, -24
785 ; CHECK-NEXT: tbnz w19, #0, .LBB6_2
786 ; CHECK-NEXT: // %bb.1:
787 ; CHECK-NEXT: smstart sm
788 ; CHECK-NEXT: .LBB6_2:
789 ; CHECK-NEXT: bl streaming_callee
790 ; CHECK-NEXT: tbnz w19, #0, .LBB6_4
791 ; CHECK-NEXT: // %bb.3:
792 ; CHECK-NEXT: smstop sm
793 ; CHECK-NEXT: .LBB6_4:
794 ; CHECK-NEXT: .cfi_restore vg
795 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
796 ; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
797 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
798 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
799 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
800 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
801 ; CHECK-NEXT: .cfi_def_cfa_offset 0
802 ; CHECK-NEXT: .cfi_restore w19
803 ; CHECK-NEXT: .cfi_restore w30
804 ; CHECK-NEXT: .cfi_restore b8
805 ; CHECK-NEXT: .cfi_restore b9
806 ; CHECK-NEXT: .cfi_restore b10
807 ; CHECK-NEXT: .cfi_restore b11
808 ; CHECK-NEXT: .cfi_restore b12
809 ; CHECK-NEXT: .cfi_restore b13
810 ; CHECK-NEXT: .cfi_restore b14
811 ; CHECK-NEXT: .cfi_restore b15
814 ; FP-CHECK-LABEL: streaming_compatible_to_streaming:
815 ; FP-CHECK: // %bb.0:
816 ; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
817 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 96
818 ; FP-CHECK-NEXT: cntd x9
819 ; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
820 ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
821 ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
822 ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
823 ; FP-CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
824 ; FP-CHECK-NEXT: add x29, sp, #64
825 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32
826 ; FP-CHECK-NEXT: .cfi_offset w19, -8
827 ; FP-CHECK-NEXT: .cfi_offset w30, -24
828 ; FP-CHECK-NEXT: .cfi_offset w29, -32
829 ; FP-CHECK-NEXT: .cfi_offset b8, -40
830 ; FP-CHECK-NEXT: .cfi_offset b9, -48
831 ; FP-CHECK-NEXT: .cfi_offset b10, -56
832 ; FP-CHECK-NEXT: .cfi_offset b11, -64
833 ; FP-CHECK-NEXT: .cfi_offset b12, -72
834 ; FP-CHECK-NEXT: .cfi_offset b13, -80
835 ; FP-CHECK-NEXT: .cfi_offset b14, -88
836 ; FP-CHECK-NEXT: .cfi_offset b15, -96
837 ; FP-CHECK-NEXT: bl __arm_sme_state
838 ; FP-CHECK-NEXT: and x19, x0, #0x1
839 ; FP-CHECK-NEXT: .cfi_offset vg, -16
840 ; FP-CHECK-NEXT: tbnz w19, #0, .LBB6_2
841 ; FP-CHECK-NEXT: // %bb.1:
842 ; FP-CHECK-NEXT: smstart sm
843 ; FP-CHECK-NEXT: .LBB6_2:
844 ; FP-CHECK-NEXT: bl streaming_callee
845 ; FP-CHECK-NEXT: tbnz w19, #0, .LBB6_4
846 ; FP-CHECK-NEXT: // %bb.3:
847 ; FP-CHECK-NEXT: smstop sm
848 ; FP-CHECK-NEXT: .LBB6_4:
849 ; FP-CHECK-NEXT: .cfi_restore vg
850 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96
851 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
852 ; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
853 ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
854 ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
855 ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
856 ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
857 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0
858 ; FP-CHECK-NEXT: .cfi_restore w19
859 ; FP-CHECK-NEXT: .cfi_restore w30
860 ; FP-CHECK-NEXT: .cfi_restore w29
861 ; FP-CHECK-NEXT: .cfi_restore b8
862 ; FP-CHECK-NEXT: .cfi_restore b9
863 ; FP-CHECK-NEXT: .cfi_restore b10
864 ; FP-CHECK-NEXT: .cfi_restore b11
865 ; FP-CHECK-NEXT: .cfi_restore b12
866 ; FP-CHECK-NEXT: .cfi_restore b13
867 ; FP-CHECK-NEXT: .cfi_restore b14
868 ; FP-CHECK-NEXT: .cfi_restore b15
871 ; OUTLINER-CHECK-LABEL: streaming_compatible_to_streaming:
872 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
874 call void @streaming_callee()
878 define void @streaming_compatible_to_non_streaming() #4 {
879 ; CHECK-LABEL: streaming_compatible_to_non_streaming:
881 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
882 ; CHECK-NEXT: .cfi_def_cfa_offset 96
883 ; CHECK-NEXT: cntd x9
884 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
885 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
886 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
887 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
888 ; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
889 ; CHECK-NEXT: .cfi_offset w19, -16
890 ; CHECK-NEXT: .cfi_offset w30, -32
891 ; CHECK-NEXT: .cfi_offset b8, -40
892 ; CHECK-NEXT: .cfi_offset b9, -48
893 ; CHECK-NEXT: .cfi_offset b10, -56
894 ; CHECK-NEXT: .cfi_offset b11, -64
895 ; CHECK-NEXT: .cfi_offset b12, -72
896 ; CHECK-NEXT: .cfi_offset b13, -80
897 ; CHECK-NEXT: .cfi_offset b14, -88
898 ; CHECK-NEXT: .cfi_offset b15, -96
899 ; CHECK-NEXT: bl __arm_sme_state
900 ; CHECK-NEXT: and x19, x0, #0x1
901 ; CHECK-NEXT: .cfi_offset vg, -24
902 ; CHECK-NEXT: tbz w19, #0, .LBB7_2
903 ; CHECK-NEXT: // %bb.1:
904 ; CHECK-NEXT: smstop sm
905 ; CHECK-NEXT: .LBB7_2:
906 ; CHECK-NEXT: bl callee
907 ; CHECK-NEXT: tbz w19, #0, .LBB7_4
908 ; CHECK-NEXT: // %bb.3:
909 ; CHECK-NEXT: smstart sm
910 ; CHECK-NEXT: .LBB7_4:
911 ; CHECK-NEXT: .cfi_restore vg
912 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
913 ; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
914 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
915 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
916 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
917 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
918 ; CHECK-NEXT: .cfi_def_cfa_offset 0
919 ; CHECK-NEXT: .cfi_restore w19
920 ; CHECK-NEXT: .cfi_restore w30
921 ; CHECK-NEXT: .cfi_restore b8
922 ; CHECK-NEXT: .cfi_restore b9
923 ; CHECK-NEXT: .cfi_restore b10
924 ; CHECK-NEXT: .cfi_restore b11
925 ; CHECK-NEXT: .cfi_restore b12
926 ; CHECK-NEXT: .cfi_restore b13
927 ; CHECK-NEXT: .cfi_restore b14
928 ; CHECK-NEXT: .cfi_restore b15
931 ; FP-CHECK-LABEL: streaming_compatible_to_non_streaming:
932 ; FP-CHECK: // %bb.0:
933 ; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
934 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 96
935 ; FP-CHECK-NEXT: cntd x9
936 ; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
937 ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
938 ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
939 ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
940 ; FP-CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
941 ; FP-CHECK-NEXT: add x29, sp, #64
942 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32
943 ; FP-CHECK-NEXT: .cfi_offset w19, -8
944 ; FP-CHECK-NEXT: .cfi_offset w30, -24
945 ; FP-CHECK-NEXT: .cfi_offset w29, -32
946 ; FP-CHECK-NEXT: .cfi_offset b8, -40
947 ; FP-CHECK-NEXT: .cfi_offset b9, -48
948 ; FP-CHECK-NEXT: .cfi_offset b10, -56
949 ; FP-CHECK-NEXT: .cfi_offset b11, -64
950 ; FP-CHECK-NEXT: .cfi_offset b12, -72
951 ; FP-CHECK-NEXT: .cfi_offset b13, -80
952 ; FP-CHECK-NEXT: .cfi_offset b14, -88
953 ; FP-CHECK-NEXT: .cfi_offset b15, -96
954 ; FP-CHECK-NEXT: bl __arm_sme_state
955 ; FP-CHECK-NEXT: and x19, x0, #0x1
956 ; FP-CHECK-NEXT: .cfi_offset vg, -16
957 ; FP-CHECK-NEXT: tbz w19, #0, .LBB7_2
958 ; FP-CHECK-NEXT: // %bb.1:
959 ; FP-CHECK-NEXT: smstop sm
960 ; FP-CHECK-NEXT: .LBB7_2:
961 ; FP-CHECK-NEXT: bl callee
962 ; FP-CHECK-NEXT: tbz w19, #0, .LBB7_4
963 ; FP-CHECK-NEXT: // %bb.3:
964 ; FP-CHECK-NEXT: smstart sm
965 ; FP-CHECK-NEXT: .LBB7_4:
966 ; FP-CHECK-NEXT: .cfi_restore vg
967 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96
968 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
969 ; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
970 ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
971 ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
972 ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
973 ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
974 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0
975 ; FP-CHECK-NEXT: .cfi_restore w19
976 ; FP-CHECK-NEXT: .cfi_restore w30
977 ; FP-CHECK-NEXT: .cfi_restore w29
978 ; FP-CHECK-NEXT: .cfi_restore b8
979 ; FP-CHECK-NEXT: .cfi_restore b9
980 ; FP-CHECK-NEXT: .cfi_restore b10
981 ; FP-CHECK-NEXT: .cfi_restore b11
982 ; FP-CHECK-NEXT: .cfi_restore b12
983 ; FP-CHECK-NEXT: .cfi_restore b13
984 ; FP-CHECK-NEXT: .cfi_restore b14
985 ; FP-CHECK-NEXT: .cfi_restore b15
988 ; OUTLINER-CHECK-LABEL: streaming_compatible_to_non_streaming:
989 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
995 ; If the target does not have SVE, do not emit cntd in the prologue and
996 ; instead spill the result returned by __arm_get_current_vg.
997 ; This requires preserving the argument %x as the vg value is returned
1000 define void @streaming_compatible_no_sve(i32 noundef %x) #4 {
1001 ; NO-SVE-CHECK-LABEL: streaming_compatible_no_sve:
1002 ; NO-SVE-CHECK: // %bb.0:
1003 ; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
1004 ; NO-SVE-CHECK-NEXT: .cfi_def_cfa_offset 96
1005 ; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
1006 ; NO-SVE-CHECK-NEXT: mov x9, x0
1007 ; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
1008 ; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
1009 ; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
1010 ; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg
1011 ; NO-SVE-CHECK-NEXT: stp x0, x19, [sp, #80] // 16-byte Folded Spill
1012 ; NO-SVE-CHECK-NEXT: mov x0, x9
1013 ; NO-SVE-CHECK-NEXT: add x29, sp, #64
1014 ; NO-SVE-CHECK-NEXT: .cfi_def_cfa w29, 32
1015 ; NO-SVE-CHECK-NEXT: .cfi_offset w19, -8
1016 ; NO-SVE-CHECK-NEXT: .cfi_offset w30, -24
1017 ; NO-SVE-CHECK-NEXT: .cfi_offset w29, -32
1018 ; NO-SVE-CHECK-NEXT: .cfi_offset b8, -40
1019 ; NO-SVE-CHECK-NEXT: .cfi_offset b9, -48
1020 ; NO-SVE-CHECK-NEXT: .cfi_offset b10, -56
1021 ; NO-SVE-CHECK-NEXT: .cfi_offset b11, -64
1022 ; NO-SVE-CHECK-NEXT: .cfi_offset b12, -72
1023 ; NO-SVE-CHECK-NEXT: .cfi_offset b13, -80
1024 ; NO-SVE-CHECK-NEXT: .cfi_offset b14, -88
1025 ; NO-SVE-CHECK-NEXT: .cfi_offset b15, -96
1026 ; NO-SVE-CHECK-NEXT: mov w8, w0
1027 ; NO-SVE-CHECK-NEXT: bl __arm_sme_state
1028 ; NO-SVE-CHECK-NEXT: and x19, x0, #0x1
1029 ; NO-SVE-CHECK-NEXT: .cfi_offset vg, -16
1030 ; NO-SVE-CHECK-NEXT: tbnz w19, #0, .LBB8_2
1031 ; NO-SVE-CHECK-NEXT: // %bb.1:
1032 ; NO-SVE-CHECK-NEXT: smstart sm
1033 ; NO-SVE-CHECK-NEXT: .LBB8_2:
1034 ; NO-SVE-CHECK-NEXT: mov w0, w8
1035 ; NO-SVE-CHECK-NEXT: bl streaming_callee_with_arg
1036 ; NO-SVE-CHECK-NEXT: tbnz w19, #0, .LBB8_4
1037 ; NO-SVE-CHECK-NEXT: // %bb.3:
1038 ; NO-SVE-CHECK-NEXT: smstop sm
1039 ; NO-SVE-CHECK-NEXT: .LBB8_4:
1040 ; NO-SVE-CHECK-NEXT: .cfi_restore vg
1041 ; NO-SVE-CHECK-NEXT: .cfi_def_cfa wsp, 96
1042 ; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
1043 ; NO-SVE-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
1044 ; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1045 ; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1046 ; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1047 ; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
1048 ; NO-SVE-CHECK-NEXT: .cfi_def_cfa_offset 0
1049 ; NO-SVE-CHECK-NEXT: .cfi_restore w19
1050 ; NO-SVE-CHECK-NEXT: .cfi_restore w30
1051 ; NO-SVE-CHECK-NEXT: .cfi_restore w29
1052 ; NO-SVE-CHECK-NEXT: .cfi_restore b8
1053 ; NO-SVE-CHECK-NEXT: .cfi_restore b9
1054 ; NO-SVE-CHECK-NEXT: .cfi_restore b10
1055 ; NO-SVE-CHECK-NEXT: .cfi_restore b11
1056 ; NO-SVE-CHECK-NEXT: .cfi_restore b12
1057 ; NO-SVE-CHECK-NEXT: .cfi_restore b13
1058 ; NO-SVE-CHECK-NEXT: .cfi_restore b14
1059 ; NO-SVE-CHECK-NEXT: .cfi_restore b15
1060 ; NO-SVE-CHECK-NEXT: ret
1062 ; OUTLINER-CHECK-LABEL: streaming_compatible_no_sve:
1063 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
1065 call void @streaming_callee_with_arg(i32 %x)
1069 ; The algorithm that fixes up the offsets of the callee-save/restore
1070 ; instructions must jump over the instructions that instantiate the current
1071 ; 'VG' value. We must make sure that it doesn't consider any RDSVL in
1072 ; user-code as if it is part of the frame-setup when doing so.
1073 define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind {
1074 ; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue:
1075 ; NO-SVE-CHECK: // %bb.0:
1076 ; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
1077 ; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
1078 ; NO-SVE-CHECK-NEXT: mov x9, x0
1079 ; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
1080 ; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
1081 ; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
1082 ; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg
1083 ; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
1084 ; NO-SVE-CHECK-NEXT: mov x0, x9
1085 ; NO-SVE-CHECK-NEXT: rdsvl x8, #1
1086 ; NO-SVE-CHECK-NEXT: add x29, sp, #64
1087 ; NO-SVE-CHECK-NEXT: lsr x8, x8, #3
1088 ; NO-SVE-CHECK-NEXT: mov x1, x0
1089 ; NO-SVE-CHECK-NEXT: smstart sm
1090 ; NO-SVE-CHECK-NEXT: mov x0, x8
1091 ; NO-SVE-CHECK-NEXT: bl bar
1092 ; NO-SVE-CHECK-NEXT: smstop sm
1093 ; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
1094 ; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1095 ; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1096 ; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1097 ; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
1098 ; NO-SVE-CHECK-NEXT: ret
1099 %some_alloc = alloca i64, align 8
1100 %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd()
1101 call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled"
1105 declare void @bar(i64, i64)
1107 ; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables
1108 ; if the function contains a streaming-mode change.
1110 define void @vg_unwind_noasync() #5 {
1111 ; CHECK-LABEL: vg_unwind_noasync:
1113 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
1114 ; CHECK-NEXT: .cfi_def_cfa_offset 80
1115 ; CHECK-NEXT: cntd x9
1116 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
1117 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
1118 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
1119 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
1120 ; CHECK-NEXT: .cfi_offset w30, -16
1121 ; CHECK-NEXT: .cfi_offset b8, -24
1122 ; CHECK-NEXT: .cfi_offset b9, -32
1123 ; CHECK-NEXT: .cfi_offset b10, -40
1124 ; CHECK-NEXT: .cfi_offset b11, -48
1125 ; CHECK-NEXT: .cfi_offset b12, -56
1126 ; CHECK-NEXT: .cfi_offset b13, -64
1127 ; CHECK-NEXT: .cfi_offset b14, -72
1128 ; CHECK-NEXT: .cfi_offset b15, -80
1129 ; CHECK-NEXT: .cfi_offset vg, -8
1130 ; CHECK-NEXT: smstop sm
1131 ; CHECK-NEXT: bl callee
1132 ; CHECK-NEXT: smstart sm
1133 ; CHECK-NEXT: .cfi_restore vg
1134 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1135 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
1136 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1137 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1138 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
1139 ; CHECK-NEXT: .cfi_def_cfa_offset 0
1140 ; CHECK-NEXT: .cfi_restore w30
1141 ; CHECK-NEXT: .cfi_restore b8
1142 ; CHECK-NEXT: .cfi_restore b9
1143 ; CHECK-NEXT: .cfi_restore b10
1144 ; CHECK-NEXT: .cfi_restore b11
1145 ; CHECK-NEXT: .cfi_restore b12
1146 ; CHECK-NEXT: .cfi_restore b13
1147 ; CHECK-NEXT: .cfi_restore b14
1148 ; CHECK-NEXT: .cfi_restore b15
1151 ; FP-CHECK-LABEL: vg_unwind_noasync:
1152 ; FP-CHECK: // %bb.0:
1153 ; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
1154 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 96
1155 ; FP-CHECK-NEXT: cntd x9
1156 ; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
1157 ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
1158 ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
1159 ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
1160 ; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
1161 ; FP-CHECK-NEXT: add x29, sp, #64
1162 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32
1163 ; FP-CHECK-NEXT: .cfi_offset w30, -24
1164 ; FP-CHECK-NEXT: .cfi_offset w29, -32
1165 ; FP-CHECK-NEXT: .cfi_offset b8, -40
1166 ; FP-CHECK-NEXT: .cfi_offset b9, -48
1167 ; FP-CHECK-NEXT: .cfi_offset b10, -56
1168 ; FP-CHECK-NEXT: .cfi_offset b11, -64
1169 ; FP-CHECK-NEXT: .cfi_offset b12, -72
1170 ; FP-CHECK-NEXT: .cfi_offset b13, -80
1171 ; FP-CHECK-NEXT: .cfi_offset b14, -88
1172 ; FP-CHECK-NEXT: .cfi_offset b15, -96
1173 ; FP-CHECK-NEXT: .cfi_offset vg, -16
1174 ; FP-CHECK-NEXT: smstop sm
1175 ; FP-CHECK-NEXT: bl callee
1176 ; FP-CHECK-NEXT: smstart sm
1177 ; FP-CHECK-NEXT: .cfi_restore vg
1178 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96
1179 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
1180 ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1181 ; FP-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1182 ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1183 ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
1184 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0
1185 ; FP-CHECK-NEXT: .cfi_restore w30
1186 ; FP-CHECK-NEXT: .cfi_restore w29
1187 ; FP-CHECK-NEXT: .cfi_restore b8
1188 ; FP-CHECK-NEXT: .cfi_restore b9
1189 ; FP-CHECK-NEXT: .cfi_restore b10
1190 ; FP-CHECK-NEXT: .cfi_restore b11
1191 ; FP-CHECK-NEXT: .cfi_restore b12
1192 ; FP-CHECK-NEXT: .cfi_restore b13
1193 ; FP-CHECK-NEXT: .cfi_restore b14
1194 ; FP-CHECK-NEXT: .cfi_restore b15
1195 ; FP-CHECK-NEXT: ret
1196 ; OUTLINER-CHECK-LABEL: vg_unwind_noasync:
1197 ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_
1199 call void @callee();
1203 attributes #0 = { "aarch64_pstate_sm_enabled" uwtable(async) }
1204 attributes #1 = { "probe-stack"="inline-asm" "aarch64_pstate_sm_enabled" uwtable(async) }
1205 attributes #3 = { "aarch64_pstate_sm_body" uwtable(async) }
1206 attributes #4 = { "aarch64_pstate_sm_compatible" uwtable(async) }
1207 attributes #5 = { "aarch64_pstate_sm_enabled" }