1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
4 ; This file tests the following combinations related to streaming-enabled functions:
5 ; [ ] N -> S (Normal -> Streaming)
6 ; [ ] S -> N (Streaming -> Normal)
7 ; [ ] S -> S (Streaming -> Streaming)
8 ; [ ] S -> SC (Streaming -> Streaming-compatible)
10 ; The following combination is tested in sme-streaming-compatible-interface.ll
11 ; [ ] SC -> S (Streaming-compatible -> Streaming)
13 declare void @normal_callee()
14 declare void @streaming_callee() "aarch64_pstate_sm_enabled"
15 declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"
21 define void @normal_caller_streaming_callee() nounwind {
22 ; CHECK-LABEL: normal_caller_streaming_callee:
24 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
26 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
27 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
28 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
29 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
30 ; CHECK-NEXT: smstart sm
31 ; CHECK-NEXT: bl streaming_callee
32 ; CHECK-NEXT: smstop sm
33 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
34 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
35 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
36 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
37 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
39 call void @streaming_callee()
47 define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" {
48 ; CHECK-LABEL: streaming_caller_normal_callee:
50 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
52 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
53 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
54 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
55 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
56 ; CHECK-NEXT: smstop sm
57 ; CHECK-NEXT: bl normal_callee
58 ; CHECK-NEXT: smstart sm
59 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
60 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
61 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
62 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
63 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
65 call void @normal_callee()
73 define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" {
74 ; CHECK-LABEL: streaming_caller_streaming_callee:
76 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
77 ; CHECK-NEXT: bl streaming_callee
78 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
80 call void @streaming_callee()
88 define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" {
89 ; CHECK-LABEL: streaming_caller_streaming_compatible_callee:
91 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
92 ; CHECK-NEXT: bl streaming_compatible_callee
93 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
95 call void @streaming_compatible_callee()
100 ; Handle special cases here.
103 ; Call to function-pointer (with attribute)
104 define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind {
105 ; CHECK-LABEL: call_to_function_pointer_streaming_enabled:
107 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
108 ; CHECK-NEXT: cntd x9
109 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
110 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
111 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
112 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
113 ; CHECK-NEXT: smstart sm
115 ; CHECK-NEXT: smstop sm
116 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
117 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
118 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
119 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
120 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
122 call void %p() "aarch64_pstate_sm_enabled"
126 ; Ensure NEON registers are preserved correctly.
127 define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind {
128 ; CHECK-LABEL: smstart_clobber_simdfp:
130 ; CHECK-NEXT: sub sp, sp, #96
131 ; CHECK-NEXT: cntd x9
132 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
133 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
134 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
135 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
136 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
137 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
138 ; CHECK-NEXT: smstart sm
139 ; CHECK-NEXT: bl streaming_callee
140 ; CHECK-NEXT: smstop sm
141 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
142 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
143 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
144 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
145 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
146 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
147 ; CHECK-NEXT: add sp, sp, #96
149 call void @streaming_callee()
153 ; Ensure SVE registers are preserved correctly.
154 define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) nounwind {
155 ; CHECK-LABEL: smstart_clobber_sve:
157 ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
158 ; CHECK-NEXT: cntd x9
159 ; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
160 ; CHECK-NEXT: addvl sp, sp, #-18
161 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
162 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
163 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
164 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
165 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
166 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
167 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
168 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
169 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
170 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
171 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
172 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
173 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
174 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
175 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
176 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
177 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
178 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
179 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
180 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
181 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
182 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
183 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
184 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
185 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
186 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
187 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
188 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
189 ; CHECK-NEXT: addvl sp, sp, #-1
190 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
191 ; CHECK-NEXT: smstart sm
192 ; CHECK-NEXT: bl streaming_callee
193 ; CHECK-NEXT: smstop sm
194 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
195 ; CHECK-NEXT: addvl sp, sp, #1
196 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
197 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
198 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
199 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
200 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
201 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
202 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
203 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
204 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
205 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
206 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
207 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
208 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
209 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
210 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
211 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
212 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
213 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
214 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
215 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
216 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
217 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
218 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
219 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
220 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
221 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
222 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
223 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
224 ; CHECK-NEXT: addvl sp, sp, #18
225 ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
227 call void @streaming_callee()
228 ret <vscale x 4 x i32> %x;
231 ; Call streaming callee twice; there should be no spills/fills between the two
232 ; calls since the registers should have already been clobbered.
233 define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) nounwind {
234 ; CHECK-LABEL: smstart_clobber_sve_duplicate:
236 ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
237 ; CHECK-NEXT: cntd x9
238 ; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
239 ; CHECK-NEXT: addvl sp, sp, #-18
240 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
241 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
242 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
243 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
244 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
245 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
246 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
247 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
248 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
249 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
250 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
251 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
252 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
253 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
254 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
255 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
256 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
257 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
258 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
259 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
260 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
261 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
262 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
263 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
264 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
265 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
266 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
267 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
268 ; CHECK-NEXT: addvl sp, sp, #-1
269 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
270 ; CHECK-NEXT: smstart sm
271 ; CHECK-NEXT: bl streaming_callee
272 ; CHECK-NEXT: bl streaming_callee
273 ; CHECK-NEXT: smstop sm
274 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
275 ; CHECK-NEXT: addvl sp, sp, #1
276 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
277 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
278 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
279 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
280 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
281 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
282 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
283 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
284 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
285 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
286 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
287 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
288 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
289 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
290 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
291 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
292 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
293 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
294 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
295 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
296 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
297 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
298 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
299 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
300 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
301 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
302 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
303 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
304 ; CHECK-NEXT: addvl sp, sp, #18
305 ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
307 call void @streaming_callee()
308 call void @streaming_callee()
309 ret <vscale x 4 x i32> %x;
312 ; Ensure smstart is not removed, because call to llvm.cos is not part of a chain.
313 define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" {
314 ; CHECK-LABEL: call_to_intrinsic_without_chain:
315 ; CHECK: // %bb.0: // %entry
316 ; CHECK-NEXT: sub sp, sp, #96
317 ; CHECK-NEXT: cntd x9
318 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
319 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
320 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
321 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
322 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
323 ; CHECK-NEXT: stp d0, d0, [sp] // 16-byte Folded Spill
324 ; CHECK-NEXT: smstop sm
325 ; CHECK-NEXT: ldr d0, [sp] // 8-byte Folded Reload
327 ; CHECK-NEXT: str d0, [sp] // 8-byte Folded Spill
328 ; CHECK-NEXT: smstart sm
329 ; CHECK-NEXT: ldp d1, d0, [sp] // 16-byte Folded Reload
330 ; CHECK-NEXT: fadd d0, d1, d0
331 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
332 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
333 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
334 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
335 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
336 ; CHECK-NEXT: add sp, sp, #96
339 %res = call fast double @llvm.cos.f64(double %x)
340 %res.fadd = fadd fast double %res, %x
344 declare double @llvm.cos.f64(double)
346 ; Ensure that tail call optimization is disabled when the streaming mode
348 define void @disable_tailcallopt() nounwind {
349 ; CHECK-LABEL: disable_tailcallopt:
351 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
352 ; CHECK-NEXT: cntd x9
353 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
354 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
355 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
356 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
357 ; CHECK-NEXT: smstart sm
358 ; CHECK-NEXT: bl streaming_callee
359 ; CHECK-NEXT: smstop sm
360 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
361 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
362 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
363 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
364 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
366 tail call void @streaming_callee()
370 define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 {
371 ; CHECK-LABEL: call_to_non_streaming_pass_sve_objects:
372 ; CHECK: // %bb.0: // %entry
373 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
374 ; CHECK-NEXT: cntd x9
375 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
376 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
377 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
378 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
379 ; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
380 ; CHECK-NEXT: addvl sp, sp, #-3
381 ; CHECK-NEXT: rdsvl x3, #1
382 ; CHECK-NEXT: addvl x0, sp, #2
383 ; CHECK-NEXT: addvl x1, sp, #1
384 ; CHECK-NEXT: mov x2, sp
385 ; CHECK-NEXT: smstop sm
387 ; CHECK-NEXT: smstart sm
388 ; CHECK-NEXT: ptrue p0.b
389 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp, #2, mul vl]
390 ; CHECK-NEXT: fmov w0, s0
391 ; CHECK-NEXT: addvl sp, sp, #3
392 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
393 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
394 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
395 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
396 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
399 %Data1 = alloca <vscale x 16 x i8>, align 16
400 %Data2 = alloca <vscale x 16 x i8>, align 16
401 %Data3 = alloca <vscale x 16 x i8>, align 16
402 %0 = tail call i64 @llvm.aarch64.sme.cntsb()
403 call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
404 %1 = load <vscale x 16 x i8>, ptr %Data1, align 16
405 %vecext = extractelement <vscale x 16 x i8> %1, i64 0
409 define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) #0 {
410 ; CHECK-LABEL: call_to_non_streaming_pass_args:
411 ; CHECK: // %bb.0: // %entry
412 ; CHECK-NEXT: sub sp, sp, #112
413 ; CHECK-NEXT: cntd x9
414 ; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
415 ; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
416 ; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
417 ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
418 ; CHECK-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill
419 ; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill
420 ; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
421 ; CHECK-NEXT: smstop sm
422 ; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
423 ; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload
425 ; CHECK-NEXT: smstart sm
426 ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
427 ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
428 ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
429 ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
430 ; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
431 ; CHECK-NEXT: add sp, sp, #112
434 call void @bar(ptr noundef nonnull %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2)
438 declare i64 @llvm.aarch64.sme.cntsb()
440 declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)
441 declare void @bar(ptr noundef, i64 noundef, i64 noundef, i32 noundef, i32 noundef, float noundef, float noundef, double noundef, double noundef)
443 attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }