1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
4 ; This file tests the following combinations related to streaming-enabled functions:
5 ; [ ] N -> S (Normal -> Streaming)
6 ; [ ] S -> N (Streaming -> Normal)
7 ; [ ] S -> S (Streaming -> Streaming)
8 ; [ ] S -> SC (Streaming -> Streaming-compatible)
10 ; The following combination is tested in sme-streaming-compatible-interface.ll
11 ; [ ] SC -> S (Streaming-compatible -> Streaming)
13 declare void @normal_callee()
14 declare void @streaming_callee() "aarch64_pstate_sm_enabled"
15 declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"
21 define void @normal_caller_streaming_callee() nounwind {
22 ; CHECK-LABEL: normal_caller_streaming_callee:
24 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
25 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
26 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
27 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
28 ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
29 ; CHECK-NEXT: smstart sm
30 ; CHECK-NEXT: bl streaming_callee
31 ; CHECK-NEXT: smstop sm
32 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
33 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
34 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
35 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
36 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
38 call void @streaming_callee()
46 define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" {
47 ; CHECK-LABEL: streaming_caller_normal_callee:
49 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
50 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
51 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
52 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
53 ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
54 ; CHECK-NEXT: smstop sm
55 ; CHECK-NEXT: bl normal_callee
56 ; CHECK-NEXT: smstart sm
57 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
58 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
59 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
60 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
61 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
63 call void @normal_callee()
71 define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" {
72 ; CHECK-LABEL: streaming_caller_streaming_callee:
74 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
75 ; CHECK-NEXT: bl streaming_callee
76 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
78 call void @streaming_callee()
86 define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" {
87 ; CHECK-LABEL: streaming_caller_streaming_compatible_callee:
89 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
90 ; CHECK-NEXT: bl streaming_compatible_callee
91 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
93 call void @streaming_compatible_callee()
98 ; Handle special cases here.
101 ; Call to function-pointer (with attribute)
102 define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind {
103 ; CHECK-LABEL: call_to_function_pointer_streaming_enabled:
105 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
106 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
107 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
108 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
109 ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
110 ; CHECK-NEXT: smstart sm
112 ; CHECK-NEXT: smstop sm
113 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
114 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
115 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
116 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
117 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
119 call void %p() "aarch64_pstate_sm_enabled"
123 ; Ensure NEON registers are preserved correctly.
124 define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind {
125 ; CHECK-LABEL: smstart_clobber_simdfp:
127 ; CHECK-NEXT: sub sp, sp, #96
128 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
129 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
130 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
131 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
132 ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
133 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
134 ; CHECK-NEXT: smstart sm
135 ; CHECK-NEXT: bl streaming_callee
136 ; CHECK-NEXT: smstop sm
137 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
138 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
139 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
140 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
141 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
142 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
143 ; CHECK-NEXT: add sp, sp, #96
145 call void @streaming_callee()
149 ; Ensure SVE registers are preserved correctly.
150 define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) #0 {
151 ; CHECK-LABEL: smstart_clobber_sve:
153 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
154 ; CHECK-NEXT: addvl sp, sp, #-18
155 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
156 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
157 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
158 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
159 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
160 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
161 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
162 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
163 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
164 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
165 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
166 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
167 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
168 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
169 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
170 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
171 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
172 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
173 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
174 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
175 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
176 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
177 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
178 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
179 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
180 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
181 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
182 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
183 ; CHECK-NEXT: addvl sp, sp, #-1
184 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
185 ; CHECK-NEXT: smstart sm
186 ; CHECK-NEXT: bl streaming_callee
187 ; CHECK-NEXT: smstop sm
188 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
189 ; CHECK-NEXT: addvl sp, sp, #1
190 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
191 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
192 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
193 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
194 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
195 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
196 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
197 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
198 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
199 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
200 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
201 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
202 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
203 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
204 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
205 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
206 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
207 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
208 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
209 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
210 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
211 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
212 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
213 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
214 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
215 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
216 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
217 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
218 ; CHECK-NEXT: addvl sp, sp, #18
219 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
221 call void @streaming_callee()
222 ret <vscale x 4 x i32> %x;
225 ; Call streaming callee twice; there should be no spills/fills between the two
226 ; calls since the registers should have already been clobbered.
227 define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) #0 {
228 ; CHECK-LABEL: smstart_clobber_sve_duplicate:
230 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
231 ; CHECK-NEXT: addvl sp, sp, #-18
232 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
233 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
234 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
235 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
236 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
237 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
238 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
239 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
240 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
241 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
242 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
243 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
244 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
245 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
246 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
247 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
248 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
249 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
250 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
251 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
252 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
253 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
254 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
255 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
256 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
257 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
258 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
259 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
260 ; CHECK-NEXT: addvl sp, sp, #-1
261 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
262 ; CHECK-NEXT: smstart sm
263 ; CHECK-NEXT: bl streaming_callee
264 ; CHECK-NEXT: smstop sm
265 ; CHECK-NEXT: smstart sm
266 ; CHECK-NEXT: bl streaming_callee
267 ; CHECK-NEXT: smstop sm
268 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
269 ; CHECK-NEXT: addvl sp, sp, #1
270 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
271 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
272 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
273 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
274 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
275 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
276 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
277 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
278 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
279 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
280 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
281 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
282 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
283 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
284 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
285 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
286 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
287 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
288 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
289 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
290 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
291 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
292 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
293 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
294 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
295 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
296 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
297 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
298 ; CHECK-NEXT: addvl sp, sp, #18
299 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
301 call void @streaming_callee()
302 call void @streaming_callee()
303 ret <vscale x 4 x i32> %x;
306 ; Ensure smstart is not removed, because call to llvm.cos is not part of a chain.
307 define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" {
308 ; CHECK-LABEL: call_to_intrinsic_without_chain:
309 ; CHECK: // %bb.0: // %entry
310 ; CHECK-NEXT: sub sp, sp, #96
311 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
312 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
313 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
314 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
315 ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
316 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
317 ; CHECK-NEXT: smstop sm
318 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
320 ; CHECK-NEXT: str d0, [sp] // 8-byte Folded Spill
321 ; CHECK-NEXT: smstart sm
322 ; CHECK-NEXT: ldp d1, d0, [sp] // 16-byte Folded Reload
323 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
324 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
325 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
326 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
327 ; CHECK-NEXT: fadd d0, d1, d0
328 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
329 ; CHECK-NEXT: add sp, sp, #96
332 %res = call fast double @llvm.cos.f64(double %x)
333 %res.fadd = fadd fast double %res, %x
337 declare double @llvm.cos.f64(double)
339 ; Ensure that tail call optimization is disabled when the streaming mode
341 define void @disable_tailcallopt() nounwind {
342 ; CHECK-LABEL: disable_tailcallopt:
344 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
345 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
346 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
347 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
348 ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
349 ; CHECK-NEXT: smstart sm
350 ; CHECK-NEXT: bl streaming_callee
351 ; CHECK-NEXT: smstop sm
352 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
353 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
354 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
355 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
356 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
358 tail call void @streaming_callee()
362 define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #1 {
363 ; CHECK-LABEL: call_to_non_streaming_pass_sve_objects:
364 ; CHECK: // %bb.0: // %entry
365 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
366 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
367 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
368 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
369 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
370 ; CHECK-NEXT: addvl sp, sp, #-3
371 ; CHECK-NEXT: rdsvl x8, #1
372 ; CHECK-NEXT: addvl x9, sp, #2
373 ; CHECK-NEXT: addvl x10, sp, #1
374 ; CHECK-NEXT: mov x11, sp
375 ; CHECK-NEXT: smstop sm
376 ; CHECK-NEXT: mov x0, x9
377 ; CHECK-NEXT: mov x1, x10
378 ; CHECK-NEXT: mov x2, x11
379 ; CHECK-NEXT: mov x3, x8
381 ; CHECK-NEXT: smstart sm
382 ; CHECK-NEXT: ptrue p0.b
383 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp, #2, mul vl]
384 ; CHECK-NEXT: fmov w0, s0
385 ; CHECK-NEXT: addvl sp, sp, #3
386 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
387 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
388 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
389 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
390 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
393 %Data1 = alloca <vscale x 16 x i8>, align 16
394 %Data2 = alloca <vscale x 16 x i8>, align 16
395 %Data3 = alloca <vscale x 16 x i8>, align 16
396 %0 = tail call i64 @llvm.aarch64.sme.cntsb()
397 call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
398 %1 = load <vscale x 16 x i8>, ptr %Data1, align 16
399 %vecext = extractelement <vscale x 16 x i8> %1, i64 0
403 declare i64 @llvm.aarch64.sme.cntsb()
405 declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
407 attributes #0 = { nounwind "target-features"="+sve" }
408 attributes #1 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }