1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sme < %s | FileCheck %s
4 ; This file tests the following combinations related to streaming-enabled functions:
5 ; [ ] N -> SC (Normal -> Streaming-compatible)
6 ; [ ] SC -> N (Streaming-compatible -> Normal)
7 ; [ ] SC -> S (Streaming-compatible -> Streaming)
8 ; [ ] SC -> SC (Streaming-compatible -> Streaming-compatible)
10 ; The following combination is tested in sme-streaming-interface.ll
11 ; [ ] S -> SC (Streaming -> Streaming-compatible)
13 declare void @normal_callee();
14 declare void @streaming_callee() "aarch64_pstate_sm_enabled";
15 declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
17 ; [x] N -> SC (Normal -> Streaming-compatible)
18 ; [ ] SC -> N (Streaming-compatible -> Normal)
19 ; [ ] SC -> S (Streaming-compatible -> Streaming)
20 ; [ ] SC -> SC (Streaming-compatible -> Streaming-compatible)
21 define void @normal_caller_streaming_compatible_callee() nounwind {
22 ; CHECK-LABEL: normal_caller_streaming_compatible_callee:
24 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
25 ; CHECK-NEXT: bl streaming_compatible_callee
26 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
28 call void @streaming_compatible_callee();
32 ; [ ] N -> SC (Normal -> Streaming-compatible)
33 ; [x] SC -> N (Streaming-compatible -> Normal)
34 ; [ ] SC -> S (Streaming-compatible -> Streaming)
35 ; [ ] SC -> SC (Streaming-compatible -> Streaming-compatible)
36 define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" nounwind {
37 ; CHECK-LABEL: streaming_compatible_caller_normal_callee:
39 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
40 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
41 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
42 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
43 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
44 ; CHECK-NEXT: bl __arm_sme_state
45 ; CHECK-NEXT: and x19, x0, #0x1
46 ; CHECK-NEXT: tbz w19, #0, .LBB1_2
47 ; CHECK-NEXT: // %bb.1:
48 ; CHECK-NEXT: smstop sm
49 ; CHECK-NEXT: .LBB1_2:
50 ; CHECK-NEXT: bl normal_callee
51 ; CHECK-NEXT: tbz w19, #0, .LBB1_4
52 ; CHECK-NEXT: // %bb.3:
53 ; CHECK-NEXT: smstart sm
54 ; CHECK-NEXT: .LBB1_4:
55 ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
56 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
57 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
58 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
59 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
62 call void @normal_callee();
66 ; Streaming Compatible Caller, Streaming Callee
68 ; [ ] N -> SC (Normal -> Streaming-compatible)
69 ; [ ] SC -> N (Streaming-compatible -> Normal)
70 ; [x] SC -> S (Streaming-compatible -> Streaming)
71 ; [ ] SC -> SC (Streaming-compatible -> Streaming-compatible)
72 define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_compatible" nounwind {
73 ; CHECK-LABEL: streaming_compatible_caller_streaming_callee:
75 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
76 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
77 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
78 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
79 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
80 ; CHECK-NEXT: bl __arm_sme_state
81 ; CHECK-NEXT: and x19, x0, #0x1
82 ; CHECK-NEXT: tbnz w19, #0, .LBB2_2
83 ; CHECK-NEXT: // %bb.1:
84 ; CHECK-NEXT: smstart sm
85 ; CHECK-NEXT: .LBB2_2:
86 ; CHECK-NEXT: bl streaming_callee
87 ; CHECK-NEXT: tbnz w19, #0, .LBB2_4
88 ; CHECK-NEXT: // %bb.3:
89 ; CHECK-NEXT: smstop sm
90 ; CHECK-NEXT: .LBB2_4:
91 ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
92 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
93 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
94 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
95 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
98 call void @streaming_callee();
102 ; [ ] N -> SC (Normal -> Streaming-compatible)
103 ; [ ] SC -> N (Streaming-compatible -> Normal)
104 ; [ ] SC -> S (Streaming-compatible -> Streaming)
105 ; [x] SC -> SC (Streaming-compatible -> Streaming-compatible)
106 define void @streaming_compatible_caller_and_callee() "aarch64_pstate_sm_compatible" nounwind {
107 ; CHECK-LABEL: streaming_compatible_caller_and_callee:
109 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
110 ; CHECK-NEXT: bl streaming_compatible_callee
111 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
114 call void @streaming_compatible_callee();
120 ; Handle special cases here.
123 define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "aarch64_pstate_sm_compatible" nounwind {
124 ; CHECK-LABEL: streaming_compatible_with_neon_vectors:
126 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
127 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
128 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
129 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
130 ; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
131 ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
132 ; CHECK-NEXT: addvl sp, sp, #-2
133 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
134 ; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
135 ; CHECK-NEXT: bl __arm_sme_state
136 ; CHECK-NEXT: and x19, x0, #0x1
137 ; CHECK-NEXT: tbz w19, #0, .LBB4_2
138 ; CHECK-NEXT: // %bb.1:
139 ; CHECK-NEXT: smstop sm
140 ; CHECK-NEXT: .LBB4_2:
141 ; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
142 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
143 ; CHECK-NEXT: bl normal_callee_vec_arg
144 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
145 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
146 ; CHECK-NEXT: tbz w19, #0, .LBB4_4
147 ; CHECK-NEXT: // %bb.3:
148 ; CHECK-NEXT: smstart sm
149 ; CHECK-NEXT: .LBB4_4:
150 ; CHECK-NEXT: ptrue p0.d, vl2
151 ; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
152 ; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload
153 ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
154 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
155 ; CHECK-NEXT: addvl sp, sp, #2
156 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
157 ; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
158 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
159 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
160 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
161 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
163 %res = call <2 x double> @normal_callee_vec_arg(<2 x double> %arg)
164 %fadd = fadd <2 x double> %res, %arg
165 ret <2 x double> %fadd
167 declare <2 x double> @normal_callee_vec_arg(<2 x double>)
169 define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale x 2 x double> %arg) "aarch64_pstate_sm_compatible" nounwind {
170 ; CHECK-LABEL: streaming_compatible_with_scalable_vectors:
172 ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
173 ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
174 ; CHECK-NEXT: addvl sp, sp, #-18
175 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
176 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
177 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
178 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
179 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
180 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
181 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
182 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
183 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
184 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
185 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
186 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
187 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
188 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
189 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
190 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
191 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
192 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
193 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
194 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
195 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
196 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
197 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
198 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
199 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
200 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
201 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
202 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
203 ; CHECK-NEXT: addvl sp, sp, #-2
204 ; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
205 ; CHECK-NEXT: bl __arm_sme_state
206 ; CHECK-NEXT: and x19, x0, #0x1
207 ; CHECK-NEXT: tbz w19, #0, .LBB5_2
208 ; CHECK-NEXT: // %bb.1:
209 ; CHECK-NEXT: smstop sm
210 ; CHECK-NEXT: .LBB5_2:
211 ; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
212 ; CHECK-NEXT: bl normal_callee_scalable_vec_arg
213 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
214 ; CHECK-NEXT: tbz w19, #0, .LBB5_4
215 ; CHECK-NEXT: // %bb.3:
216 ; CHECK-NEXT: smstart sm
217 ; CHECK-NEXT: .LBB5_4:
218 ; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
219 ; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload
220 ; CHECK-NEXT: fadd z0.d, z1.d, z0.d
221 ; CHECK-NEXT: addvl sp, sp, #2
222 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
223 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
224 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
225 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
226 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
227 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
228 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
229 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
230 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
231 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
232 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
233 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
234 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
235 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
236 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
237 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
238 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
239 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
240 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
241 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
242 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
243 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
244 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
245 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
246 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
247 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
248 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
249 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
250 ; CHECK-NEXT: addvl sp, sp, #18
251 ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
252 ; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
254 %res = call <vscale x 2 x double> @normal_callee_scalable_vec_arg(<vscale x 2 x double> %arg)
255 %fadd = fadd <vscale x 2 x double> %res, %arg
256 ret <vscale x 2 x double> %fadd
259 declare <vscale x 2 x double> @normal_callee_scalable_vec_arg(<vscale x 2 x double>)
261 define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x 2 x i1> %arg) "aarch64_pstate_sm_compatible" nounwind {
262 ; CHECK-LABEL: streaming_compatible_with_predicate_vectors:
264 ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
265 ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
266 ; CHECK-NEXT: addvl sp, sp, #-18
267 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
268 ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
269 ; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
270 ; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
271 ; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
272 ; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
273 ; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
274 ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
275 ; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
276 ; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
277 ; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
278 ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
279 ; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
280 ; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
281 ; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
282 ; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
283 ; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
284 ; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
285 ; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
286 ; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
287 ; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
288 ; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
289 ; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
290 ; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
291 ; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
292 ; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
293 ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
294 ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
295 ; CHECK-NEXT: addvl sp, sp, #-1
296 ; CHECK-NEXT: str p0, [sp, #7, mul vl] // 2-byte Folded Spill
297 ; CHECK-NEXT: bl __arm_sme_state
298 ; CHECK-NEXT: and x19, x0, #0x1
299 ; CHECK-NEXT: tbz w19, #0, .LBB6_2
300 ; CHECK-NEXT: // %bb.1:
301 ; CHECK-NEXT: smstop sm
302 ; CHECK-NEXT: .LBB6_2:
303 ; CHECK-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
304 ; CHECK-NEXT: bl normal_callee_predicate_vec_arg
305 ; CHECK-NEXT: str p0, [sp, #6, mul vl] // 2-byte Folded Spill
306 ; CHECK-NEXT: tbz w19, #0, .LBB6_4
307 ; CHECK-NEXT: // %bb.3:
308 ; CHECK-NEXT: smstart sm
309 ; CHECK-NEXT: .LBB6_4:
310 ; CHECK-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
311 ; CHECK-NEXT: ldr p1, [sp, #6, mul vl] // 2-byte Folded Reload
312 ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
313 ; CHECK-NEXT: addvl sp, sp, #1
314 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
315 ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
316 ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
317 ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
318 ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
319 ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
320 ; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
321 ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
322 ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
323 ; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
324 ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
325 ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
326 ; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
327 ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
328 ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
329 ; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
330 ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
331 ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
332 ; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
333 ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
334 ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
335 ; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
336 ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
337 ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
338 ; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
339 ; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
340 ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
341 ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
342 ; CHECK-NEXT: addvl sp, sp, #18
343 ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
344 ; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
346 %res = call <vscale x 2 x i1> @normal_callee_predicate_vec_arg(<vscale x 2 x i1> %arg)
347 %and = and <vscale x 2 x i1> %res, %arg
348 ret <vscale x 2 x i1> %and
351 declare <vscale x 2 x i1> @normal_callee_predicate_vec_arg(<vscale x 2 x i1>)
353 define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatible" nounwind {
354 ; CHECK-LABEL: conditional_smstart_unreachable_block:
356 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
357 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
358 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
359 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
360 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
361 ; CHECK-NEXT: bl __arm_sme_state
362 ; CHECK-NEXT: and x19, x0, #0x1
363 ; CHECK-NEXT: tbnz w19, #0, .LBB7_2
364 ; CHECK-NEXT: // %bb.1:
365 ; CHECK-NEXT: smstart sm
366 ; CHECK-NEXT: .LBB7_2:
367 ; CHECK-NEXT: bl streaming_callee
368 call void @streaming_callee()
372 define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_compatible" nounwind {
373 ; CHECK-LABEL: conditional_smstart_no_successor_block:
375 ; CHECK-NEXT: tbz w0, #0, .LBB8_6
376 ; CHECK-NEXT: // %bb.1: // %if.then
377 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
378 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
379 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
380 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
381 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
382 ; CHECK-NEXT: bl __arm_sme_state
383 ; CHECK-NEXT: and x19, x0, #0x1
384 ; CHECK-NEXT: tbnz w19, #0, .LBB8_3
385 ; CHECK-NEXT: // %bb.2: // %if.then
386 ; CHECK-NEXT: smstart sm
387 ; CHECK-NEXT: .LBB8_3: // %if.then
388 ; CHECK-NEXT: bl streaming_callee
389 ; CHECK-NEXT: tbnz w19, #0, .LBB8_5
390 ; CHECK-NEXT: // %bb.4: // %if.then
391 ; CHECK-NEXT: smstop sm
392 ; CHECK-NEXT: .LBB8_5: // %if.then
393 ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
394 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
395 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
396 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
397 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
398 ; CHECK-NEXT: .LBB8_6: // %exit
400 br i1 %p, label %if.then, label %exit
403 call void @streaming_callee()
410 define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
411 ; CHECK-LABEL: disable_tailcallopt:
413 ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
414 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
415 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
416 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
417 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
418 ; CHECK-NEXT: bl __arm_sme_state
419 ; CHECK-NEXT: and x19, x0, #0x1
420 ; CHECK-NEXT: tbz w19, #0, .LBB9_2
421 ; CHECK-NEXT: // %bb.1:
422 ; CHECK-NEXT: smstop sm
423 ; CHECK-NEXT: .LBB9_2:
424 ; CHECK-NEXT: bl normal_callee
425 ; CHECK-NEXT: tbz w19, #0, .LBB9_4
426 ; CHECK-NEXT: // %bb.3:
427 ; CHECK-NEXT: smstart sm
428 ; CHECK-NEXT: .LBB9_4:
429 ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
430 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
431 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
432 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
433 ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
436 tail call void @normal_callee();
440 define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) "aarch64_pstate_sm_compatible" {
441 ; CHECK-LABEL: call_to_non_streaming_pass_args:
442 ; CHECK: // %bb.0: // %entry
443 ; CHECK-NEXT: sub sp, sp, #112
444 ; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
445 ; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
446 ; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
447 ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
448 ; CHECK-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill
449 ; CHECK-NEXT: .cfi_def_cfa_offset 112
450 ; CHECK-NEXT: .cfi_offset w19, -8
451 ; CHECK-NEXT: .cfi_offset w30, -16
452 ; CHECK-NEXT: .cfi_offset b8, -24
453 ; CHECK-NEXT: .cfi_offset b9, -32
454 ; CHECK-NEXT: .cfi_offset b10, -40
455 ; CHECK-NEXT: .cfi_offset b11, -48
456 ; CHECK-NEXT: .cfi_offset b12, -56
457 ; CHECK-NEXT: .cfi_offset b13, -64
458 ; CHECK-NEXT: .cfi_offset b14, -72
459 ; CHECK-NEXT: .cfi_offset b15, -80
460 ; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill
461 ; CHECK-NEXT: mov x8, x1
462 ; CHECK-NEXT: mov x9, x0
463 ; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
464 ; CHECK-NEXT: bl __arm_sme_state
465 ; CHECK-NEXT: and x19, x0, #0x1
466 ; CHECK-NEXT: tbz w19, #0, .LBB10_2
467 ; CHECK-NEXT: // %bb.1: // %entry
468 ; CHECK-NEXT: smstop sm
469 ; CHECK-NEXT: .LBB10_2: // %entry
470 ; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
471 ; CHECK-NEXT: mov x0, x9
472 ; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload
473 ; CHECK-NEXT: mov x1, x8
475 ; CHECK-NEXT: tbz w19, #0, .LBB10_4
476 ; CHECK-NEXT: // %bb.3: // %entry
477 ; CHECK-NEXT: smstart sm
478 ; CHECK-NEXT: .LBB10_4: // %entry
479 ; CHECK-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload
480 ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
481 ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
482 ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
483 ; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
484 ; CHECK-NEXT: add sp, sp, #112
487 call void @bar(ptr noundef nonnull %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2)
491 declare void @bar(ptr noundef, i64 noundef, i64 noundef, i32 noundef, i32 noundef, float noundef, float noundef, double noundef, double noundef)