1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=aarch64 -O0 -mattr=+sme < %s | FileCheck %s
4 target triple = "aarch64-linux-gnu"
6 declare void @llvm.trap() #0
8 ; This test checks that we don't assert/crash due to not being able to reach the
9 ; emergency spill slot by ensuring that we use a BP for streaming functions.
11 define void @quux() #1 {
13 ; CHECK: // %bb.0: // %prelude
14 ; CHECK-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
15 ; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
16 ; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
17 ; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
18 ; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
19 ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
20 ; CHECK-NEXT: mov x29, sp
21 ; CHECK-NEXT: sub sp, sp, #384
22 ; CHECK-NEXT: addvl sp, sp, #-1
23 ; CHECK-NEXT: mov x19, sp
24 ; CHECK-NEXT: .cfi_def_cfa w29, 96
25 ; CHECK-NEXT: .cfi_offset w19, -8
26 ; CHECK-NEXT: .cfi_offset w20, -16
27 ; CHECK-NEXT: .cfi_offset w21, -24
28 ; CHECK-NEXT: .cfi_offset w22, -32
29 ; CHECK-NEXT: .cfi_offset w23, -40
30 ; CHECK-NEXT: .cfi_offset w24, -48
31 ; CHECK-NEXT: .cfi_offset w25, -56
32 ; CHECK-NEXT: .cfi_offset w26, -64
33 ; CHECK-NEXT: .cfi_offset w27, -72
34 ; CHECK-NEXT: .cfi_offset w28, -80
35 ; CHECK-NEXT: .cfi_offset w30, -88
36 ; CHECK-NEXT: .cfi_offset w29, -96
37 ; CHECK-NEXT: rdsvl x8, #1
38 ; CHECK-NEXT: mrs x8, TPIDR2_EL0
39 ; CHECK-NEXT: cbz x8, .LBB0_2
40 ; CHECK-NEXT: b .LBB0_1
41 ; CHECK-NEXT: .LBB0_1: // %save.za
42 ; CHECK-NEXT: bl __arm_tpidr2_save
43 ; CHECK-NEXT: mov x8, xzr
44 ; CHECK-NEXT: msr TPIDR2_EL0, x8
45 ; CHECK-NEXT: b .LBB0_2
46 ; CHECK-NEXT: .LBB0_2: // %bb
47 ; CHECK-NEXT: smstart za
48 ; CHECK-NEXT: zero {za}
49 ; CHECK-NEXT: mov w9, #15 // =0xf
50 ; CHECK-NEXT: // implicit-def: $x8
51 ; CHECK-NEXT: mov w8, w9
52 ; CHECK-NEXT: mov x9, x8
54 ; CHECK-NEXT: mov w0, w9
55 ; CHECK-NEXT: // implicit-def: $x9
56 ; CHECK-NEXT: mov w9, w0
57 ; CHECK-NEXT: and x14, x9, #0x70
58 ; CHECK-NEXT: str x14, [x19, #16] // 8-byte Folded Spill
59 ; CHECK-NEXT: mov x9, sp
60 ; CHECK-NEXT: subs x9, x9, x14
61 ; CHECK-NEXT: mov sp, x9
62 ; CHECK-NEXT: str x9, [x19, #24] // 8-byte Folded Spill
63 ; CHECK-NEXT: mov x9, sp
64 ; CHECK-NEXT: subs x9, x9, x14
65 ; CHECK-NEXT: mov sp, x9
66 ; CHECK-NEXT: str x9, [x19, #32] // 8-byte Folded Spill
67 ; CHECK-NEXT: addvl x9, x8, #1
68 ; CHECK-NEXT: mov w0, w9
69 ; CHECK-NEXT: // implicit-def: $x9
70 ; CHECK-NEXT: mov w9, w0
71 ; CHECK-NEXT: and x10, x9, #0x3f0
72 ; CHECK-NEXT: str x10, [x19, #40] // 8-byte Folded Spill
73 ; CHECK-NEXT: mov x9, sp
74 ; CHECK-NEXT: subs x9, x9, x10
75 ; CHECK-NEXT: mov sp, x9
76 ; CHECK-NEXT: str x9, [x19, #48] // 8-byte Folded Spill
77 ; CHECK-NEXT: mov x9, sp
78 ; CHECK-NEXT: subs x9, x9, x10
79 ; CHECK-NEXT: mov sp, x9
80 ; CHECK-NEXT: str x9, [x19, #56] // 8-byte Folded Spill
81 ; CHECK-NEXT: mov x9, sp
82 ; CHECK-NEXT: subs x9, x9, x14
83 ; CHECK-NEXT: mov sp, x9
84 ; CHECK-NEXT: str x9, [x19, #64] // 8-byte Folded Spill
85 ; CHECK-NEXT: mov x9, sp
86 ; CHECK-NEXT: subs x9, x9, x14
87 ; CHECK-NEXT: mov sp, x9
88 ; CHECK-NEXT: str x9, [x19, #72] // 8-byte Folded Spill
89 ; CHECK-NEXT: mov x9, sp
90 ; CHECK-NEXT: subs x9, x9, x10
91 ; CHECK-NEXT: mov sp, x9
92 ; CHECK-NEXT: str x9, [x19, #80] // 8-byte Folded Spill
93 ; CHECK-NEXT: mov x9, sp
94 ; CHECK-NEXT: subs x9, x9, x10
95 ; CHECK-NEXT: mov sp, x9
96 ; CHECK-NEXT: str x9, [x19, #88] // 8-byte Folded Spill
97 ; CHECK-NEXT: mov x9, sp
98 ; CHECK-NEXT: subs x9, x9, x14
99 ; CHECK-NEXT: mov sp, x9
100 ; CHECK-NEXT: str x9, [x19, #96] // 8-byte Folded Spill
101 ; CHECK-NEXT: mov x9, sp
102 ; CHECK-NEXT: subs x9, x9, x14
103 ; CHECK-NEXT: mov sp, x9
104 ; CHECK-NEXT: str x9, [x19, #104] // 8-byte Folded Spill
105 ; CHECK-NEXT: mov x9, sp
106 ; CHECK-NEXT: subs x9, x9, x10
107 ; CHECK-NEXT: mov sp, x9
108 ; CHECK-NEXT: str x9, [x19, #112] // 8-byte Folded Spill
109 ; CHECK-NEXT: mov x9, sp
110 ; CHECK-NEXT: subs x9, x9, x10
111 ; CHECK-NEXT: mov sp, x9
112 ; CHECK-NEXT: str x9, [x19, #120] // 8-byte Folded Spill
113 ; CHECK-NEXT: mov x9, sp
114 ; CHECK-NEXT: subs x9, x9, x14
115 ; CHECK-NEXT: mov sp, x9
116 ; CHECK-NEXT: str x9, [x19, #128] // 8-byte Folded Spill
117 ; CHECK-NEXT: mov x9, sp
118 ; CHECK-NEXT: subs x9, x9, x14
119 ; CHECK-NEXT: mov sp, x9
120 ; CHECK-NEXT: str x9, [x19, #136] // 8-byte Folded Spill
121 ; CHECK-NEXT: mov x9, sp
122 ; CHECK-NEXT: subs x9, x9, x10
123 ; CHECK-NEXT: mov sp, x9
124 ; CHECK-NEXT: str x9, [x19, #144] // 8-byte Folded Spill
125 ; CHECK-NEXT: mov x9, sp
126 ; CHECK-NEXT: subs x9, x9, x10
127 ; CHECK-NEXT: mov sp, x9
128 ; CHECK-NEXT: str x9, [x19, #152] // 8-byte Folded Spill
129 ; CHECK-NEXT: mov x9, sp
130 ; CHECK-NEXT: subs x9, x9, #16
131 ; CHECK-NEXT: mov sp, x9
132 ; CHECK-NEXT: mov x9, sp
133 ; CHECK-NEXT: subs x9, x9, #16
134 ; CHECK-NEXT: mov sp, x9
135 ; CHECK-NEXT: mov x9, sp
136 ; CHECK-NEXT: subs x9, x9, #16
137 ; CHECK-NEXT: mov sp, x9
138 ; CHECK-NEXT: mov x9, sp
139 ; CHECK-NEXT: subs x9, x9, #16
140 ; CHECK-NEXT: mov sp, x9
141 ; CHECK-NEXT: mov x9, sp
142 ; CHECK-NEXT: subs x9, x9, #16
143 ; CHECK-NEXT: mov sp, x9
144 ; CHECK-NEXT: mov x9, sp
145 ; CHECK-NEXT: subs x9, x9, x14
146 ; CHECK-NEXT: mov sp, x9
147 ; CHECK-NEXT: mov x9, sp
148 ; CHECK-NEXT: subs x9, x9, #16
149 ; CHECK-NEXT: mov sp, x9
150 ; CHECK-NEXT: addvl x9, x8, #2
151 ; CHECK-NEXT: mov w0, w9
152 ; CHECK-NEXT: // implicit-def: $x9
153 ; CHECK-NEXT: mov w9, w0
154 ; CHECK-NEXT: and x9, x9, #0x7f0
155 ; CHECK-NEXT: mov x10, sp
156 ; CHECK-NEXT: subs x10, x10, x9
157 ; CHECK-NEXT: and x10, x10, #0xffffffffffffffe0
158 ; CHECK-NEXT: mov sp, x10
159 ; CHECK-NEXT: mov x2, sp
160 ; CHECK-NEXT: subs x10, x2, #16
161 ; CHECK-NEXT: mov sp, x10
162 ; CHECK-NEXT: str x10, [x19, #160] // 8-byte Folded Spill
163 ; CHECK-NEXT: mov x10, sp
164 ; CHECK-NEXT: subs x11, x10, x14
165 ; CHECK-NEXT: mov sp, x11
166 ; CHECK-NEXT: mov x10, x11
167 ; CHECK-NEXT: str x10, [x19, #168] // 8-byte Folded Spill
168 ; CHECK-NEXT: mov x0, sp
169 ; CHECK-NEXT: subs x10, x0, #16
170 ; CHECK-NEXT: mov sp, x10
171 ; CHECK-NEXT: str x10, [x19, #176] // 8-byte Folded Spill
172 ; CHECK-NEXT: mov x17, sp
173 ; CHECK-NEXT: subs x10, x17, #16
174 ; CHECK-NEXT: mov sp, x10
175 ; CHECK-NEXT: str x10, [x19, #184] // 8-byte Folded Spill
176 ; CHECK-NEXT: mov x10, sp
177 ; CHECK-NEXT: subs x10, x10, x14
178 ; CHECK-NEXT: str x10, [x19, #360] // 8-byte Folded Spill
179 ; CHECK-NEXT: mov sp, x10
180 ; CHECK-NEXT: str x10, [x19, #192] // 8-byte Folded Spill
181 ; CHECK-NEXT: mov x15, sp
182 ; CHECK-NEXT: subs x10, x15, #16
183 ; CHECK-NEXT: mov sp, x10
184 ; CHECK-NEXT: str x10, [x19, #200] // 8-byte Folded Spill
185 ; CHECK-NEXT: mov x13, sp
186 ; CHECK-NEXT: subs x10, x13, #16
187 ; CHECK-NEXT: mov sp, x10
188 ; CHECK-NEXT: str x10, [x19, #208] // 8-byte Folded Spill
189 ; CHECK-NEXT: incw x8
190 ; CHECK-NEXT: mov w1, w8
191 ; CHECK-NEXT: // implicit-def: $x8
192 ; CHECK-NEXT: mov w8, w1
193 ; CHECK-NEXT: and x12, x8, #0xf0
194 ; CHECK-NEXT: mov x8, sp
195 ; CHECK-NEXT: subs x10, x8, x12
196 ; CHECK-NEXT: mov sp, x10
197 ; CHECK-NEXT: mov x8, x10
198 ; CHECK-NEXT: str x8, [x19, #216] // 8-byte Folded Spill
199 ; CHECK-NEXT: mov x8, sp
200 ; CHECK-NEXT: subs x8, x8, x12
201 ; CHECK-NEXT: str x8, [x19, #368] // 8-byte Folded Spill
202 ; CHECK-NEXT: mov sp, x8
203 ; CHECK-NEXT: str x8, [x19, #224] // 8-byte Folded Spill
204 ; CHECK-NEXT: mov x8, sp
205 ; CHECK-NEXT: subs x8, x8, x9
206 ; CHECK-NEXT: and x8, x8, #0xffffffffffffffe0
207 ; CHECK-NEXT: mov sp, x8
208 ; CHECK-NEXT: str x8, [x19, #232] // 8-byte Folded Spill
209 ; CHECK-NEXT: mov x8, sp
210 ; CHECK-NEXT: subs x8, x8, x9
211 ; CHECK-NEXT: and x8, x8, #0xffffffffffffffe0
212 ; CHECK-NEXT: mov sp, x8
213 ; CHECK-NEXT: str x8, [x19, #240] // 8-byte Folded Spill
214 ; CHECK-NEXT: mov x8, sp
215 ; CHECK-NEXT: str x8, [x19, #336] // 8-byte Folded Spill
216 ; CHECK-NEXT: subs x8, x8, #16
217 ; CHECK-NEXT: mov sp, x8
218 ; CHECK-NEXT: mov x8, sp
219 ; CHECK-NEXT: str x8, [x19, #344] // 8-byte Folded Spill
220 ; CHECK-NEXT: subs x8, x8, #16
221 ; CHECK-NEXT: mov sp, x8
222 ; CHECK-NEXT: mov x24, sp
223 ; CHECK-NEXT: subs x8, x24, #16
224 ; CHECK-NEXT: mov sp, x8
225 ; CHECK-NEXT: mov x7, sp
226 ; CHECK-NEXT: subs x8, x7, #16
227 ; CHECK-NEXT: mov sp, x8
228 ; CHECK-NEXT: mov x27, sp
229 ; CHECK-NEXT: subs x8, x27, #16
230 ; CHECK-NEXT: mov sp, x8
231 ; CHECK-NEXT: mov x26, sp
232 ; CHECK-NEXT: subs x8, x26, #16
233 ; CHECK-NEXT: mov sp, x8
234 ; CHECK-NEXT: mov x1, sp
235 ; CHECK-NEXT: subs x8, x1, #16
236 ; CHECK-NEXT: mov sp, x8
237 ; CHECK-NEXT: mov x9, sp
238 ; CHECK-NEXT: subs x8, x9, #16
239 ; CHECK-NEXT: mov sp, x8
240 ; CHECK-NEXT: mov x20, sp
241 ; CHECK-NEXT: subs x8, x20, #16
242 ; CHECK-NEXT: mov sp, x8
243 ; CHECK-NEXT: mov x16, sp
244 ; CHECK-NEXT: subs x8, x16, #16
245 ; CHECK-NEXT: mov sp, x8
246 ; CHECK-NEXT: mov x8, sp
247 ; CHECK-NEXT: str x8, [x19, #248] // 8-byte Folded Spill
248 ; CHECK-NEXT: subs x8, x8, #16
249 ; CHECK-NEXT: mov sp, x8
250 ; CHECK-NEXT: mov x5, sp
251 ; CHECK-NEXT: subs x8, x5, #16
252 ; CHECK-NEXT: mov sp, x8
253 ; CHECK-NEXT: mov x12, sp
254 ; CHECK-NEXT: subs x8, x12, #16
255 ; CHECK-NEXT: mov sp, x8
256 ; CHECK-NEXT: mov x22, sp
257 ; CHECK-NEXT: subs x8, x22, #16
258 ; CHECK-NEXT: mov sp, x8
259 ; CHECK-NEXT: mov x25, sp
260 ; CHECK-NEXT: subs x8, x25, #16
261 ; CHECK-NEXT: mov sp, x8
262 ; CHECK-NEXT: mov x30, sp
263 ; CHECK-NEXT: subs x8, x30, #16
264 ; CHECK-NEXT: mov sp, x8
265 ; CHECK-NEXT: mov x8, sp
266 ; CHECK-NEXT: str x8, [x19, #296] // 8-byte Folded Spill
267 ; CHECK-NEXT: subs x8, x8, #16
268 ; CHECK-NEXT: mov sp, x8
269 ; CHECK-NEXT: mov x8, sp
270 ; CHECK-NEXT: str x8, [x19, #328] // 8-byte Folded Spill
271 ; CHECK-NEXT: subs x8, x8, #16
272 ; CHECK-NEXT: mov sp, x8
273 ; CHECK-NEXT: mov x8, sp
274 ; CHECK-NEXT: str x8, [x19, #264] // 8-byte Folded Spill
275 ; CHECK-NEXT: subs x8, x8, #16
276 ; CHECK-NEXT: mov sp, x8
277 ; CHECK-NEXT: mov x8, sp
278 ; CHECK-NEXT: str x8, [x19, #256] // 8-byte Folded Spill
279 ; CHECK-NEXT: subs x8, x8, #16
280 ; CHECK-NEXT: mov sp, x8
281 ; CHECK-NEXT: mov x8, sp
282 ; CHECK-NEXT: str x8, [x19, #272] // 8-byte Folded Spill
283 ; CHECK-NEXT: subs x8, x8, #16
284 ; CHECK-NEXT: mov sp, x8
285 ; CHECK-NEXT: mov x8, sp
286 ; CHECK-NEXT: str x8, [x19, #312] // 8-byte Folded Spill
287 ; CHECK-NEXT: subs x8, x8, #16
288 ; CHECK-NEXT: mov sp, x8
289 ; CHECK-NEXT: mov x8, sp
290 ; CHECK-NEXT: str x8, [x19, #280] // 8-byte Folded Spill
291 ; CHECK-NEXT: subs x8, x8, #16
292 ; CHECK-NEXT: mov sp, x8
293 ; CHECK-NEXT: mov x8, sp
294 ; CHECK-NEXT: str x8, [x19, #304] // 8-byte Folded Spill
295 ; CHECK-NEXT: subs x8, x8, #16
296 ; CHECK-NEXT: mov sp, x8
297 ; CHECK-NEXT: mov x6, sp
298 ; CHECK-NEXT: subs x8, x6, #16
299 ; CHECK-NEXT: mov sp, x8
300 ; CHECK-NEXT: mov x21, sp
301 ; CHECK-NEXT: subs x8, x21, #16
302 ; CHECK-NEXT: mov sp, x8
303 ; CHECK-NEXT: mov x8, sp
304 ; CHECK-NEXT: str x8, [x19, #352] // 8-byte Folded Spill
305 ; CHECK-NEXT: subs x8, x8, #16
306 ; CHECK-NEXT: mov sp, x8
307 ; CHECK-NEXT: mov x28, sp
308 ; CHECK-NEXT: subs x8, x28, #16
309 ; CHECK-NEXT: mov sp, x8
310 ; CHECK-NEXT: mov x8, sp
311 ; CHECK-NEXT: subs x4, x8, x14
312 ; CHECK-NEXT: mov sp, x4
313 ; CHECK-NEXT: mov x8, sp
314 ; CHECK-NEXT: subs x3, x8, x14
315 ; CHECK-NEXT: mov sp, x3
316 ; CHECK-NEXT: mov x23, sp
317 ; CHECK-NEXT: subs x8, x23, #16
318 ; CHECK-NEXT: mov sp, x8
319 ; CHECK-NEXT: mov x18, sp
320 ; CHECK-NEXT: subs x8, x18, #16
321 ; CHECK-NEXT: mov sp, x8
322 ; CHECK-NEXT: mov x14, sp
323 ; CHECK-NEXT: subs x8, x14, #16
324 ; CHECK-NEXT: mov sp, x8
325 ; CHECK-NEXT: mov w8, wzr
326 ; CHECK-NEXT: sturb w8, [x9, #-16]
327 ; CHECK-NEXT: ldr x9, [x19, #248] // 8-byte Folded Reload
328 ; CHECK-NEXT: sturb w8, [x9, #-16]
329 ; CHECK-NEXT: ldr x9, [x19, #296] // 8-byte Folded Reload
330 ; CHECK-NEXT: sturb w8, [x30, #-16]
331 ; CHECK-NEXT: mov x8, xzr
332 ; CHECK-NEXT: str x8, [x19, #376] // 8-byte Folded Spill
333 ; CHECK-NEXT: stur x8, [x9, #-16]
334 ; CHECK-NEXT: ldur x8, [x20, #-16]
335 ; CHECK-NEXT: ldur x9, [x27, #-16]
336 ; CHECK-NEXT: add x30, x8, x9, lsl #2
337 ; CHECK-NEXT: ldur x8, [x1, #-16]
338 ; CHECK-NEXT: subs x8, x8, #1
339 ; CHECK-NEXT: ldur x9, [x16, #-16]
340 ; CHECK-NEXT: mul x8, x8, x9
341 ; CHECK-NEXT: ldr x9, [x19, #328] // 8-byte Folded Reload
342 ; CHECK-NEXT: add x30, x30, x8, lsl #2
343 ; CHECK-NEXT: ldr x8, [x19, #296] // 8-byte Folded Reload
344 ; CHECK-NEXT: stur x30, [x8, #-16]
345 ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload
346 ; CHECK-NEXT: stur x8, [x9, #-16]
347 ; CHECK-NEXT: ldur x8, [x5, #-16]
348 ; CHECK-NEXT: ldur x9, [x26, #-16]
349 ; CHECK-NEXT: add x30, x8, x9, lsl #2
350 ; CHECK-NEXT: ldur x8, [x1, #-16]
351 ; CHECK-NEXT: subs x8, x8, #1
352 ; CHECK-NEXT: ldur x9, [x12, #-16]
353 ; CHECK-NEXT: mul x8, x8, x9
354 ; CHECK-NEXT: ldr x9, [x19, #264] // 8-byte Folded Reload
355 ; CHECK-NEXT: add x30, x30, x8, lsl #2
356 ; CHECK-NEXT: ldr x8, [x19, #328] // 8-byte Folded Reload
357 ; CHECK-NEXT: stur x30, [x8, #-16]
358 ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload
359 ; CHECK-NEXT: stur x8, [x9, #-16]
360 ; CHECK-NEXT: ldur x8, [x22, #-16]
361 ; CHECK-NEXT: ldur x9, [x27, #-16]
362 ; CHECK-NEXT: add x30, x8, x9, lsl #2
363 ; CHECK-NEXT: ldur x8, [x26, #-16]
364 ; CHECK-NEXT: subs x8, x8, #1
365 ; CHECK-NEXT: ldur x9, [x25, #-16]
366 ; CHECK-NEXT: mul x8, x8, x9
367 ; CHECK-NEXT: ldr x9, [x19, #256] // 8-byte Folded Reload
368 ; CHECK-NEXT: add x30, x30, x8, lsl #2
369 ; CHECK-NEXT: ldr x8, [x19, #264] // 8-byte Folded Reload
370 ; CHECK-NEXT: stur x30, [x8, #-16]
371 ; CHECK-NEXT: ldr x8, [x19, #272] // 8-byte Folded Reload
372 ; CHECK-NEXT: mov w30, #32 // =0x20
373 ; CHECK-NEXT: // kill: def $lr killed $w30
374 ; CHECK-NEXT: stur x30, [x9, #-16]
375 ; CHECK-NEXT: ldr x9, [x19, #312] // 8-byte Folded Reload
376 ; CHECK-NEXT: stur x30, [x8, #-16]
377 ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload
378 ; CHECK-NEXT: stur x8, [x9, #-16]
379 ; CHECK-NEXT: ldur x8, [x1, #-16]
380 ; CHECK-NEXT: lsl x8, x8, #5
381 ; CHECK-NEXT: stur x8, [x9, #-16]
382 ; CHECK-NEXT: ldr x9, [x19, #280] // 8-byte Folded Reload
383 ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload
384 ; CHECK-NEXT: stur x30, [x16, #-16]
385 ; CHECK-NEXT: stur x8, [x9, #-16]
386 ; CHECK-NEXT: ldur x8, [x27, #-16]
387 ; CHECK-NEXT: subs x8, x8, #1
388 ; CHECK-NEXT: lsr x8, x8, #5
389 ; CHECK-NEXT: add x8, x8, #1
390 ; CHECK-NEXT: stur x8, [x9, #-16]
391 ; CHECK-NEXT: ldur x8, [x20, #-16]
392 ; CHECK-NEXT: str x8, [x19, #288] // 8-byte Folded Spill
393 ; CHECK-NEXT: ldr x8, [x19, #312] // 8-byte Folded Reload
394 ; CHECK-NEXT: ldur x9, [x9, #-16]
395 ; CHECK-NEXT: ldur x8, [x8, #-16]
396 ; CHECK-NEXT: mul x9, x9, x8
397 ; CHECK-NEXT: ldr x8, [x19, #288] // 8-byte Folded Reload
398 ; CHECK-NEXT: add x8, x8, x9, lsl #2
399 ; CHECK-NEXT: ldr x9, [x19, #296] // 8-byte Folded Reload
400 ; CHECK-NEXT: stur x8, [x9, #-16]
401 ; CHECK-NEXT: ldr x9, [x19, #304] // 8-byte Folded Reload
402 ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload
403 ; CHECK-NEXT: stur x30, [x12, #-16]
404 ; CHECK-NEXT: stur x8, [x9, #-16]
405 ; CHECK-NEXT: ldur x8, [x26, #-16]
406 ; CHECK-NEXT: subs x8, x8, #1
407 ; CHECK-NEXT: lsr x8, x8, #5
408 ; CHECK-NEXT: add x8, x8, #1
409 ; CHECK-NEXT: stur x8, [x9, #-16]
410 ; CHECK-NEXT: ldur x8, [x5, #-16]
411 ; CHECK-NEXT: str x8, [x19, #320] // 8-byte Folded Spill
412 ; CHECK-NEXT: ldr x8, [x19, #312] // 8-byte Folded Reload
413 ; CHECK-NEXT: ldur x9, [x9, #-16]
414 ; CHECK-NEXT: ldur x8, [x8, #-16]
415 ; CHECK-NEXT: mul x9, x9, x8
416 ; CHECK-NEXT: ldr x8, [x19, #320] // 8-byte Folded Reload
417 ; CHECK-NEXT: add x8, x8, x9, lsl #2
418 ; CHECK-NEXT: ldr x9, [x19, #328] // 8-byte Folded Reload
419 ; CHECK-NEXT: stur x8, [x9, #-16]
420 ; CHECK-NEXT: ldr x9, [x19, #352] // 8-byte Folded Reload
421 ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload
422 ; CHECK-NEXT: stur x8, [x6, #-16]
423 ; CHECK-NEXT: stur x8, [x6, #-16]
424 ; CHECK-NEXT: stur x8, [x21, #-16]
425 ; CHECK-NEXT: stur x8, [x21, #-16]
426 ; CHECK-NEXT: stur x8, [x9, #-16]
427 ; CHECK-NEXT: ldur x8, [x27, #-16]
428 ; CHECK-NEXT: ldur x9, [x21, #-16]
429 ; CHECK-NEXT: subs x8, x8, x9
430 ; CHECK-NEXT: ldr x9, [x19, #336] // 8-byte Folded Reload
431 ; CHECK-NEXT: stur x8, [x9, #-16]
432 ; CHECK-NEXT: ldr x8, [x19, #344] // 8-byte Folded Reload
433 ; CHECK-NEXT: stur x30, [x8, #-16]
434 ; CHECK-NEXT: ldr x8, [x19, #352] // 8-byte Folded Reload
435 ; CHECK-NEXT: ldur x9, [x9, #-16]
436 ; CHECK-NEXT: stur x9, [x8, #-16]
437 ; CHECK-NEXT: ldr x8, [x19, #376] // 8-byte Folded Reload
438 ; CHECK-NEXT: stur x8, [x28, #-16]
439 ; CHECK-NEXT: ldur x8, [x26, #-16]
440 ; CHECK-NEXT: ldur x9, [x6, #-16]
441 ; CHECK-NEXT: subs x8, x8, x9
442 ; CHECK-NEXT: ldr x9, [x19, #360] // 8-byte Folded Reload
443 ; CHECK-NEXT: stur x8, [x24, #-16]
444 ; CHECK-NEXT: ldr x8, [x19, #368] // 8-byte Folded Reload
445 ; CHECK-NEXT: stur x30, [x7, #-16]
446 ; CHECK-NEXT: ldr x7, [x19, #376] // 8-byte Folded Reload
447 ; CHECK-NEXT: ldur x24, [x24, #-16]
448 ; CHECK-NEXT: stur x24, [x28, #-16]
449 ; CHECK-NEXT: ldur x24, [x21, #-16]
450 ; CHECK-NEXT: ldur x27, [x27, #-16]
451 ; CHECK-NEXT: whilelt pn8.s, x24, x27, vlx2
452 ; CHECK-NEXT: str pn8, [x4]
453 ; CHECK-NEXT: ldur x24, [x6, #-16]
454 ; CHECK-NEXT: ldur x26, [x26, #-16]
455 ; CHECK-NEXT: whilelt pn8.s, x24, x26, vlx2
456 ; CHECK-NEXT: str pn8, [x3]
457 ; CHECK-NEXT: stur x7, [x23, #-16]
458 ; CHECK-NEXT: ldur x22, [x22, #-16]
459 ; CHECK-NEXT: ldur x24, [x21, #-16]
460 ; CHECK-NEXT: add x22, x22, x24, lsl #2
461 ; CHECK-NEXT: ldur x24, [x6, #-16]
462 ; CHECK-NEXT: ldur x25, [x25, #-16]
463 ; CHECK-NEXT: mul x24, x24, x25
464 ; CHECK-NEXT: add x22, x22, x24, lsl #2
465 ; CHECK-NEXT: stur x22, [x23, #-16]
466 ; CHECK-NEXT: zero {za}
467 ; CHECK-NEXT: stur x7, [x18, #-16]
468 ; CHECK-NEXT: ldur x20, [x20, #-16]
469 ; CHECK-NEXT: ldur x21, [x21, #-16]
470 ; CHECK-NEXT: ldur x22, [x1, #-16]
471 ; CHECK-NEXT: mul x21, x21, x22
472 ; CHECK-NEXT: add x20, x20, x21, lsl #2
473 ; CHECK-NEXT: stur x20, [x18, #-16]
474 ; CHECK-NEXT: stur x7, [x14, #-16]
475 ; CHECK-NEXT: ldur x5, [x5, #-16]
476 ; CHECK-NEXT: ldur x6, [x6, #-16]
477 ; CHECK-NEXT: ldur x7, [x1, #-16]
478 ; CHECK-NEXT: mul x6, x6, x7
479 ; CHECK-NEXT: add x5, x5, x6, lsl #2
480 ; CHECK-NEXT: stur x5, [x14, #-16]
481 ; CHECK-NEXT: ldur x1, [x1, #-16]
482 ; CHECK-NEXT: ldr p1, [x4]
483 ; CHECK-NEXT: ldur x18, [x18, #-16]
484 ; CHECK-NEXT: ldur x16, [x16, #-16]
485 ; CHECK-NEXT: lsr x16, x16, #2
486 ; CHECK-NEXT: ldr p0, [x3]
487 ; CHECK-NEXT: ldur x14, [x14, #-16]
488 ; CHECK-NEXT: ldur x12, [x12, #-16]
489 ; CHECK-NEXT: lsr x12, x12, #2
490 ; CHECK-NEXT: stur x1, [x2, #-16]
491 ; CHECK-NEXT: str p1, [x11]
492 ; CHECK-NEXT: stur x18, [x0, #-16]
493 ; CHECK-NEXT: stur x16, [x17, #-16]
494 ; CHECK-NEXT: str p0, [x9]
495 ; CHECK-NEXT: stur x14, [x15, #-16]
496 ; CHECK-NEXT: stur x12, [x13, #-16]
497 ; CHECK-NEXT: ldr p0, [x11]
498 ; CHECK-NEXT: mov p8.b, p0.b
499 ; CHECK-NEXT: pext { p3.s, p4.s }, pn8[0]
500 ; CHECK-NEXT: mov p0.b, p3.b
501 ; CHECK-NEXT: ptrue p2.s
502 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
503 ; CHECK-NEXT: mov p1.b, p4.b
504 ; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b
505 ; CHECK-NEXT: mov x11, x10
506 ; CHECK-NEXT: incd x11
507 ; CHECK-NEXT: str p1, [x11]
508 ; CHECK-NEXT: str p0, [x10]
509 ; CHECK-NEXT: ldr p0, [x9]
510 ; CHECK-NEXT: mov p8.b, p0.b
511 ; CHECK-NEXT: pext { p3.s, p4.s }, pn8[0]
512 ; CHECK-NEXT: mov p0.b, p3.b
513 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
514 ; CHECK-NEXT: mov p1.b, p4.b
515 ; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b
516 ; CHECK-NEXT: mov x9, x8
517 ; CHECK-NEXT: incd x9
518 ; CHECK-NEXT: str p1, [x9]
519 ; CHECK-NEXT: str p0, [x8]
520 ; CHECK-NEXT: b .LBB0_3
521 ; CHECK-NEXT: .LBB0_3: // %bb178
522 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
523 ; CHECK-NEXT: ldr x9, [x19, #160] // 8-byte Folded Reload
524 ; CHECK-NEXT: ldr x8, [x19, #56] // 8-byte Folded Reload
525 ; CHECK-NEXT: ldr x10, [x19, #48] // 8-byte Folded Reload
526 ; CHECK-NEXT: ldr x11, [x19, #32] // 8-byte Folded Reload
527 ; CHECK-NEXT: ldr x12, [x19, #24] // 8-byte Folded Reload
528 ; CHECK-NEXT: ldr x13, [x19, #240] // 8-byte Folded Reload
529 ; CHECK-NEXT: ldr x14, [x19, #232] // 8-byte Folded Reload
530 ; CHECK-NEXT: ldr x17, [x19, #88] // 8-byte Folded Reload
531 ; CHECK-NEXT: ldr x18, [x19, #80] // 8-byte Folded Reload
532 ; CHECK-NEXT: ldr x0, [x19, #72] // 8-byte Folded Reload
533 ; CHECK-NEXT: ldr x1, [x19, #64] // 8-byte Folded Reload
534 ; CHECK-NEXT: ldr x2, [x19, #216] // 8-byte Folded Reload
535 ; CHECK-NEXT: ldr x3, [x19, #120] // 8-byte Folded Reload
536 ; CHECK-NEXT: ldr x4, [x19, #112] // 8-byte Folded Reload
537 ; CHECK-NEXT: ldr x5, [x19, #104] // 8-byte Folded Reload
538 ; CHECK-NEXT: ldr x6, [x19, #96] // 8-byte Folded Reload
539 ; CHECK-NEXT: ldr x7, [x19, #224] // 8-byte Folded Reload
540 ; CHECK-NEXT: ldr x20, [x19, #152] // 8-byte Folded Reload
541 ; CHECK-NEXT: ldr x21, [x19, #144] // 8-byte Folded Reload
542 ; CHECK-NEXT: ldr x22, [x19, #136] // 8-byte Folded Reload
543 ; CHECK-NEXT: ldr x23, [x19, #128] // 8-byte Folded Reload
544 ; CHECK-NEXT: ldr x16, [x19, #200] // 8-byte Folded Reload
545 ; CHECK-NEXT: ldr x15, [x19, #208] // 8-byte Folded Reload
546 ; CHECK-NEXT: ldr x24, [x19, #192] // 8-byte Folded Reload
547 ; CHECK-NEXT: ldr x26, [x19, #176] // 8-byte Folded Reload
548 ; CHECK-NEXT: ldr x25, [x19, #184] // 8-byte Folded Reload
549 ; CHECK-NEXT: ldr x27, [x19, #168] // 8-byte Folded Reload
550 ; CHECK-NEXT: ldr p0, [x27]
551 ; CHECK-NEXT: ldr x27, [x26]
552 ; CHECK-NEXT: mov p8.b, p0.b
553 ; CHECK-NEXT: ld1w { z16.s, z24.s }, pn8/z, [x27]
554 ; CHECK-NEXT: mov z0.d, z16.d
555 ; CHECK-NEXT: mov z1.d, z24.d
556 ; CHECK-NEXT: ptrue p2.s
557 ; CHECK-NEXT: str p2, [x29, #-1, mul vl] // 2-byte Folded Spill
558 ; CHECK-NEXT: st1w { z1.s }, p2, [x14, #1, mul vl]
559 ; CHECK-NEXT: st1w { z0.s }, p2, [x14]
560 ; CHECK-NEXT: ldr x27, [x25]
561 ; CHECK-NEXT: ldr x25, [x26]
562 ; CHECK-NEXT: add x25, x25, x27, lsl #2
563 ; CHECK-NEXT: str x25, [x26]
564 ; CHECK-NEXT: ldr p0, [x24]
565 ; CHECK-NEXT: ldr x24, [x16]
566 ; CHECK-NEXT: mov p8.b, p0.b
567 ; CHECK-NEXT: ld1w { z16.s, z24.s }, pn8/z, [x24]
568 ; CHECK-NEXT: mov z0.d, z16.d
569 ; CHECK-NEXT: mov z1.d, z24.d
570 ; CHECK-NEXT: st1w { z1.s }, p2, [x13, #1, mul vl]
571 ; CHECK-NEXT: st1w { z0.s }, p2, [x13]
572 ; CHECK-NEXT: ldr x24, [x15]
573 ; CHECK-NEXT: ldr x15, [x16]
574 ; CHECK-NEXT: add x15, x15, x24, lsl #2
575 ; CHECK-NEXT: str x15, [x16]
576 ; CHECK-NEXT: mov x16, x2
577 ; CHECK-NEXT: incd x16
578 ; CHECK-NEXT: ldr p1, [x2]
579 ; CHECK-NEXT: mov x15, x7
580 ; CHECK-NEXT: incd x15
581 ; CHECK-NEXT: ldr p0, [x7]
582 ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14]
583 ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13]
584 ; CHECK-NEXT: str p1, [x23]
585 ; CHECK-NEXT: str p0, [x22]
586 ; CHECK-NEXT: st1w { z1.s }, p2, [x21]
587 ; CHECK-NEXT: st1w { z0.s }, p2, [x20]
588 ; CHECK-NEXT: ldr p0, [x23]
589 ; CHECK-NEXT: ldr p1, [x22]
590 ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x21]
591 ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x20]
592 ; CHECK-NEXT: fmopa za0.s, p0/m, p1/m, z0.s, z1.s
593 ; CHECK-NEXT: ldr p1, [x16]
594 ; CHECK-NEXT: ldr p0, [x7]
595 ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14, #1, mul vl]
596 ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13]
597 ; CHECK-NEXT: str p1, [x6]
598 ; CHECK-NEXT: str p0, [x5]
599 ; CHECK-NEXT: st1w { z1.s }, p2, [x4]
600 ; CHECK-NEXT: st1w { z0.s }, p2, [x3]
601 ; CHECK-NEXT: ldr p0, [x6]
602 ; CHECK-NEXT: ldr p1, [x5]
603 ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x4]
604 ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x3]
605 ; CHECK-NEXT: fmopa za1.s, p0/m, p1/m, z0.s, z1.s
606 ; CHECK-NEXT: ldr p1, [x2]
607 ; CHECK-NEXT: ldr p0, [x15]
608 ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14]
609 ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13, #1, mul vl]
610 ; CHECK-NEXT: str p1, [x1]
611 ; CHECK-NEXT: str p0, [x0]
612 ; CHECK-NEXT: st1w { z1.s }, p2, [x18]
613 ; CHECK-NEXT: st1w { z0.s }, p2, [x17]
614 ; CHECK-NEXT: ldr p0, [x1]
615 ; CHECK-NEXT: ldr p1, [x0]
616 ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x18]
617 ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x17]
618 ; CHECK-NEXT: fmopa za2.s, p0/m, p1/m, z0.s, z1.s
619 ; CHECK-NEXT: ldr p1, [x16]
620 ; CHECK-NEXT: ldr p0, [x15]
621 ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x14, #1, mul vl]
622 ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x13, #1, mul vl]
623 ; CHECK-NEXT: str p1, [x12]
624 ; CHECK-NEXT: str p0, [x11]
625 ; CHECK-NEXT: st1w { z1.s }, p2, [x10]
626 ; CHECK-NEXT: st1w { z0.s }, p2, [x8]
627 ; CHECK-NEXT: ldr p0, [x12]
628 ; CHECK-NEXT: ldr p1, [x11]
629 ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x10]
630 ; CHECK-NEXT: ld1w { z1.s }, p2/z, [x8]
631 ; CHECK-NEXT: fmopa za3.s, p0/m, p1/m, z0.s, z1.s
632 ; CHECK-NEXT: ldr x8, [x9]
633 ; CHECK-NEXT: subs x8, x8, #1
634 ; CHECK-NEXT: str x8, [x9]
635 ; CHECK-NEXT: b .LBB0_3
637 %alloca = alloca <vscale x 16 x i1>, align 2
638 %alloca1 = alloca <vscale x 16 x i1>, align 2
639 %alloca2 = alloca <vscale x 4 x float>, align 16
640 %alloca3 = alloca <vscale x 4 x float>, align 16
641 %alloca4 = alloca <vscale x 16 x i1>, align 2
642 %alloca5 = alloca <vscale x 16 x i1>, align 2
643 %alloca6 = alloca <vscale x 4 x float>, align 16
644 %alloca7 = alloca <vscale x 4 x float>, align 16
645 %alloca8 = alloca <vscale x 16 x i1>, align 2
646 %alloca9 = alloca <vscale x 16 x i1>, align 2
647 %alloca10 = alloca <vscale x 4 x float>, align 16
648 %alloca11 = alloca <vscale x 4 x float>, align 16
649 %alloca12 = alloca <vscale x 16 x i1>, align 2
650 %alloca13 = alloca <vscale x 16 x i1>, align 2
651 %alloca14 = alloca <vscale x 4 x float>, align 16
652 %alloca15 = alloca <vscale x 4 x float>, align 16
653 %alloca16 = alloca i64, align 8
654 %alloca17 = alloca i64, align 8
655 %alloca18 = alloca ptr, align 8
656 %alloca19 = alloca i64, align 8
657 %alloca20 = alloca i64, align 8
658 %alloca21 = alloca target("aarch64.svcount"), align 2
659 %alloca22 = alloca i32, align 4
660 %alloca23 = alloca <vscale x 32 x i8>, align 16
661 %alloca24 = alloca i64, align 8
662 %alloca25 = alloca target("aarch64.svcount"), align 2
663 %alloca26 = alloca ptr, align 8
664 %alloca27 = alloca i64, align 8
665 %alloca28 = alloca target("aarch64.svcount"), align 2
666 %alloca29 = alloca ptr, align 8
667 %alloca30 = alloca i64, align 8
668 %alloca31 = alloca <vscale x 32 x i1>, align 2
669 %alloca32 = alloca <vscale x 32 x i1>, align 2
670 %alloca33 = alloca <vscale x 8 x float>, align 16
671 %alloca34 = alloca <vscale x 8 x float>, align 16
672 %alloca35 = alloca i64, align 8
673 %alloca36 = alloca i64, align 8
674 %alloca37 = alloca i64, align 8
675 %alloca38 = alloca i64, align 8
676 %alloca39 = alloca i64, align 8
677 %alloca40 = alloca i64, align 8
678 %alloca41 = alloca i64, align 8
679 %alloca42 = alloca i8, align 1
680 %alloca43 = alloca ptr, align 8
681 %alloca44 = alloca i64, align 8
682 %alloca45 = alloca i8, align 1
683 %alloca46 = alloca ptr, align 8
684 %alloca47 = alloca i64, align 8
685 %alloca48 = alloca ptr, align 8
686 %alloca49 = alloca i64, align 8
687 %alloca50 = alloca i8, align 1
688 %alloca51 = alloca ptr, align 8
689 %alloca52 = alloca ptr, align 8
690 %alloca53 = alloca ptr, align 8
691 %alloca54 = alloca i64, align 8
692 %alloca55 = alloca i64, align 8
693 %alloca56 = alloca i64, align 8
694 %alloca57 = alloca i64, align 8
695 %alloca58 = alloca i64, align 8
696 %alloca59 = alloca i64, align 8
697 %alloca60 = alloca i64, align 8
698 %alloca61 = alloca i64, align 8
699 %alloca62 = alloca i64, align 8
700 %alloca63 = alloca target("aarch64.svcount"), align 2
701 %alloca64 = alloca target("aarch64.svcount"), align 2
702 %alloca65 = alloca ptr, align 8
703 %alloca66 = alloca ptr, align 8
704 %alloca67 = alloca ptr, align 8
705 store i8 0, ptr %alloca42, align 1
706 store i8 0, ptr %alloca45, align 1
707 store i8 0, ptr %alloca50, align 1
708 store ptr null, ptr %alloca51, align 8
709 %load = load ptr, ptr %alloca43, align 8
710 %load68 = load i64, ptr %alloca39, align 8
711 %getelementptr = getelementptr inbounds float, ptr %load, i64 %load68
712 %load69 = load i64, ptr %alloca41, align 8
713 %sub = sub i64 %load69, 1
714 %load70 = load i64, ptr %alloca44, align 8
715 %mul = mul i64 %sub, %load70
716 %getelementptr71 = getelementptr inbounds float, ptr %getelementptr, i64 %mul
717 store ptr %getelementptr71, ptr %alloca51, align 8
718 store ptr null, ptr %alloca52, align 8
719 %load72 = load ptr, ptr %alloca46, align 8
720 %load73 = load i64, ptr %alloca40, align 8
721 %getelementptr74 = getelementptr inbounds float, ptr %load72, i64 %load73
722 %load75 = load i64, ptr %alloca41, align 8
723 %sub76 = sub i64 %load75, 1
724 %load77 = load i64, ptr %alloca47, align 8
725 %mul78 = mul i64 %sub76, %load77
726 %getelementptr79 = getelementptr inbounds float, ptr %getelementptr74, i64 %mul78
727 store ptr %getelementptr79, ptr %alloca52, align 8
728 store ptr null, ptr %alloca53, align 8
729 %load80 = load ptr, ptr %alloca48, align 8
730 %load81 = load i64, ptr %alloca39, align 8
731 %getelementptr82 = getelementptr inbounds float, ptr %load80, i64 %load81
732 %load83 = load i64, ptr %alloca40, align 8
733 %sub84 = sub i64 %load83, 1
734 %load85 = load i64, ptr %alloca49, align 8
735 %mul86 = mul i64 %sub84, %load85
736 %getelementptr87 = getelementptr inbounds float, ptr %getelementptr82, i64 %mul86
737 store ptr %getelementptr87, ptr %alloca53, align 8
738 store i64 32, ptr %alloca54, align 8
739 store i64 32, ptr %alloca55, align 8
740 store i64 0, ptr %alloca56, align 8
741 %load88 = load i64, ptr %alloca41, align 8
742 %mul89 = mul i64 32, %load88
743 store i64 %mul89, ptr %alloca56, align 8
744 %load90 = load i8, ptr %alloca42, align 1
745 %trunc = trunc i8 %load90 to i1
746 store i64 32, ptr %alloca44, align 8
747 store i64 0, ptr %alloca57, align 8
748 %load91 = load i64, ptr %alloca39, align 8
749 %sub92 = sub i64 %load91, 1
750 %udiv = udiv i64 %sub92, 32
751 %add = add i64 %udiv, 1
752 store i64 %add, ptr %alloca57, align 8
753 %load93 = load ptr, ptr %alloca43, align 8
754 %load94 = load i64, ptr %alloca57, align 8
755 %load95 = load i64, ptr %alloca56, align 8
756 %mul96 = mul i64 %load94, %load95
757 %getelementptr97 = getelementptr inbounds float, ptr %load93, i64 %mul96
758 store ptr %getelementptr97, ptr %alloca51, align 8
759 %load98 = load i8, ptr %alloca45, align 1
760 %trunc99 = trunc i8 %load98 to i1
761 store i64 32, ptr %alloca47, align 8
762 store i64 0, ptr %alloca58, align 8
763 %load100 = load i64, ptr %alloca40, align 8
764 %sub101 = sub i64 %load100, 1
765 %udiv102 = udiv i64 %sub101, 32
766 %add103 = add i64 %udiv102, 1
767 store i64 %add103, ptr %alloca58, align 8
768 %load104 = load ptr, ptr %alloca46, align 8
769 %load105 = load i64, ptr %alloca58, align 8
770 %load106 = load i64, ptr %alloca56, align 8
771 %mul107 = mul i64 %load105, %load106
772 %getelementptr108 = getelementptr inbounds float, ptr %load104, i64 %mul107
773 store ptr %getelementptr108, ptr %alloca52, align 8
774 store i64 0, ptr %alloca59, align 8
775 store i64 0, ptr %alloca59, align 8
776 %load109 = load i64, ptr %alloca59, align 8
777 %load110 = load i64, ptr %alloca40, align 8
778 %icmp = icmp ult i64 %load109, %load110
779 store i64 0, ptr %alloca60, align 8
780 store i64 0, ptr %alloca60, align 8
781 %load111 = load i64, ptr %alloca60, align 8
782 %load112 = load i64, ptr %alloca39, align 8
783 %icmp113 = icmp ult i64 %load111, %load112
784 store i64 0, ptr %alloca61, align 8
785 %load114 = load i64, ptr %alloca39, align 8
786 %load115 = load i64, ptr %alloca60, align 8
787 %sub116 = sub i64 %load114, %load115
788 store i64 %sub116, ptr %alloca35, align 8
789 store i64 32, ptr %alloca36, align 8
790 %load117 = load i64, ptr %alloca35, align 8
791 %load118 = load i64, ptr %alloca36, align 8
792 %icmp119 = icmp ult i64 %load117, %load118
793 %load120 = load i64, ptr %alloca35, align 8
794 store i64 %load120, ptr %alloca61, align 8
795 store i64 0, ptr %alloca62, align 8
796 %load121 = load i64, ptr %alloca40, align 8
797 %load122 = load i64, ptr %alloca59, align 8
798 %sub123 = sub i64 %load121, %load122
799 store i64 %sub123, ptr %alloca37, align 8
800 store i64 32, ptr %alloca38, align 8
801 %load124 = load i64, ptr %alloca37, align 8
802 %load125 = load i64, ptr %alloca38, align 8
803 %icmp126 = icmp ult i64 %load124, %load125
804 %load127 = load i64, ptr %alloca37, align 8
805 store i64 %load127, ptr %alloca62, align 8
806 %load128 = load i64, ptr %alloca60, align 8
807 %load129 = load i64, ptr %alloca39, align 8
808 %call = call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c32(i64 %load128, i64 %load129, i32 2)
809 store target("aarch64.svcount") %call, ptr %alloca63, align 2
810 %load130 = load i64, ptr %alloca59, align 8
811 %load131 = load i64, ptr %alloca40, align 8
812 %call132 = call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c32(i64 %load130, i64 %load131, i32 2)
813 store target("aarch64.svcount") %call132, ptr %alloca64, align 2
814 store ptr null, ptr %alloca65, align 8
815 %load133 = load ptr, ptr %alloca48, align 8
816 %load134 = load i64, ptr %alloca60, align 8
817 %getelementptr135 = getelementptr inbounds float, ptr %load133, i64 %load134
818 %load136 = load i64, ptr %alloca59, align 8
819 %load137 = load i64, ptr %alloca49, align 8
820 %mul138 = mul i64 %load136, %load137
821 %getelementptr139 = getelementptr inbounds float, ptr %getelementptr135, i64 %mul138
822 store ptr %getelementptr139, ptr %alloca65, align 8
823 call void @llvm.aarch64.sme.zero(i32 255)
824 store ptr null, ptr %alloca66, align 8
825 %load140 = load i8, ptr %alloca42, align 1
826 %trunc141 = trunc i8 %load140 to i1
827 %load142 = load ptr, ptr %alloca43, align 8
828 %load143 = load i64, ptr %alloca60, align 8
829 %load144 = load i64, ptr %alloca41, align 8
830 %mul145 = mul i64 %load143, %load144
831 %getelementptr146 = getelementptr inbounds float, ptr %load142, i64 %mul145
832 store ptr %getelementptr146, ptr %alloca66, align 8
833 store ptr null, ptr %alloca67, align 8
834 %load147 = load i8, ptr %alloca45, align 1
835 %trunc148 = trunc i8 %load147 to i1
836 %load149 = load ptr, ptr %alloca46, align 8
837 %load150 = load i64, ptr %alloca59, align 8
838 %load151 = load i64, ptr %alloca41, align 8
839 %mul152 = mul i64 %load150, %load151
840 %getelementptr153 = getelementptr inbounds float, ptr %load149, i64 %mul152
841 store ptr %getelementptr153, ptr %alloca67, align 8
842 %load154 = load i64, ptr %alloca41, align 8
843 %load155 = load target("aarch64.svcount"), ptr %alloca63, align 2
844 %load156 = load ptr, ptr %alloca66, align 8
845 %load157 = load i64, ptr %alloca44, align 8
846 %udiv158 = udiv i64 %load157, 4
847 %load159 = load target("aarch64.svcount"), ptr %alloca64, align 2
848 %load160 = load ptr, ptr %alloca67, align 8
849 %load161 = load i64, ptr %alloca47, align 8
850 %udiv162 = udiv i64 %load161, 4
851 store i64 %load154, ptr %alloca24, align 8
852 store target("aarch64.svcount") %load155, ptr %alloca25, align 2
853 store ptr %load156, ptr %alloca26, align 8
854 store i64 %udiv158, ptr %alloca27, align 8
855 store target("aarch64.svcount") %load159, ptr %alloca28, align 2
856 store ptr %load160, ptr %alloca29, align 8
857 store i64 %udiv162, ptr %alloca30, align 8
858 %load163 = load target("aarch64.svcount"), ptr %alloca25, align 2
859 %call164 = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") %load163, i32 0)
860 %extractvalue = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %call164, 0
861 %call165 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %extractvalue)
862 %call166 = call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %call165, i64 0)
863 %extractvalue167 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %call164, 1
864 %call168 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %extractvalue167)
865 %call169 = call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %call166, <vscale x 16 x i1> %call168, i64 16)
866 store <vscale x 32 x i1> %call169, ptr %alloca31, align 2
867 %load170 = load target("aarch64.svcount"), ptr %alloca28, align 2
868 %call171 = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") %load170, i32 0)
869 %extractvalue172 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %call171, 0
870 %call173 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %extractvalue172)
871 %call174 = call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %call173, i64 0)
872 %extractvalue175 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %call171, 1
873 %call176 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %extractvalue175)
874 %call177 = call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %call174, <vscale x 16 x i1> %call176, i64 16)
875 store <vscale x 32 x i1> %call177, ptr %alloca32, align 2
878 bb178: ; preds = %bb178, %bb
879 %load179 = load i64, ptr %alloca24, align 8
880 %icmp180 = icmp ugt i64 %load179, 0
881 %load181 = load target("aarch64.svcount"), ptr %alloca25, align 2
882 %load182 = load ptr, ptr %alloca26, align 8
883 %call183 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") %load181, ptr %load182)
884 %extractvalue184 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %call183, 0
885 %call185 = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> %extractvalue184, i64 0)
886 %extractvalue186 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %call183, 1
887 %call187 = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> %call185, <vscale x 4 x float> %extractvalue186, i64 4)
888 store <vscale x 8 x float> %call187, ptr %alloca33, align 16
889 %load188 = load i64, ptr %alloca27, align 8
890 %load189 = load ptr, ptr %alloca26, align 8
891 %getelementptr190 = getelementptr inbounds float, ptr %load189, i64 %load188
892 store ptr %getelementptr190, ptr %alloca26, align 8
893 %load191 = load target("aarch64.svcount"), ptr %alloca28, align 2
894 %load192 = load ptr, ptr %alloca29, align 8
895 %call193 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") %load191, ptr %load192)
896 %extractvalue194 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %call193, 0
897 %call195 = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> %extractvalue194, i64 0)
898 %extractvalue196 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %call193, 1
899 %call197 = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> %call195, <vscale x 4 x float> %extractvalue196, i64 4)
900 store <vscale x 8 x float> %call197, ptr %alloca34, align 16
901 %load198 = load i64, ptr %alloca30, align 8
902 %load199 = load ptr, ptr %alloca29, align 8
903 %getelementptr200 = getelementptr inbounds float, ptr %load199, i64 %load198
904 store ptr %getelementptr200, ptr %alloca29, align 8
905 %load201 = load <vscale x 32 x i1>, ptr %alloca31, align 2
906 %call202 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load201, i64 0)
907 %load203 = load <vscale x 32 x i1>, ptr %alloca32, align 2
908 %call204 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load203, i64 0)
909 %load205 = load <vscale x 8 x float>, ptr %alloca33, align 16
910 %call206 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load205, i64 0)
911 %load207 = load <vscale x 8 x float>, ptr %alloca34, align 16
912 %call208 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load207, i64 0)
913 store <vscale x 16 x i1> %call202, ptr %alloca12, align 2
914 store <vscale x 16 x i1> %call204, ptr %alloca13, align 2
915 store <vscale x 4 x float> %call206, ptr %alloca14, align 16
916 store <vscale x 4 x float> %call208, ptr %alloca15, align 16
917 %load209 = load <vscale x 16 x i1>, ptr %alloca12, align 2
918 %load210 = load <vscale x 16 x i1>, ptr %alloca13, align 2
919 %load211 = load <vscale x 4 x float>, ptr %alloca14, align 16
920 %load212 = load <vscale x 4 x float>, ptr %alloca15, align 16
921 %call213 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load209)
922 %call214 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load210)
923 call void @llvm.aarch64.sme.mopa.nxv4f32(i32 0, <vscale x 4 x i1> %call213, <vscale x 4 x i1> %call214, <vscale x 4 x float> %load211, <vscale x 4 x float> %load212)
924 %load215 = load <vscale x 32 x i1>, ptr %alloca31, align 2
925 %call216 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load215, i64 16)
926 %load217 = load <vscale x 32 x i1>, ptr %alloca32, align 2
927 %call218 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load217, i64 0)
928 %load219 = load <vscale x 8 x float>, ptr %alloca33, align 16
929 %call220 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load219, i64 4)
930 %load221 = load <vscale x 8 x float>, ptr %alloca34, align 16
931 %call222 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load221, i64 0)
932 store <vscale x 16 x i1> %call216, ptr %alloca8, align 2
933 store <vscale x 16 x i1> %call218, ptr %alloca9, align 2
934 store <vscale x 4 x float> %call220, ptr %alloca10, align 16
935 store <vscale x 4 x float> %call222, ptr %alloca11, align 16
936 %load223 = load <vscale x 16 x i1>, ptr %alloca8, align 2
937 %load224 = load <vscale x 16 x i1>, ptr %alloca9, align 2
938 %load225 = load <vscale x 4 x float>, ptr %alloca10, align 16
939 %load226 = load <vscale x 4 x float>, ptr %alloca11, align 16
940 %call227 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load223)
941 %call228 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load224)
942 call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, <vscale x 4 x i1> %call227, <vscale x 4 x i1> %call228, <vscale x 4 x float> %load225, <vscale x 4 x float> %load226)
943 %load229 = load <vscale x 32 x i1>, ptr %alloca31, align 2
944 %call230 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load229, i64 0)
945 %load231 = load <vscale x 32 x i1>, ptr %alloca32, align 2
946 %call232 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load231, i64 16)
947 %load233 = load <vscale x 8 x float>, ptr %alloca33, align 16
948 %call234 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load233, i64 0)
949 %load235 = load <vscale x 8 x float>, ptr %alloca34, align 16
950 %call236 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load235, i64 4)
951 store <vscale x 16 x i1> %call230, ptr %alloca4, align 2
952 store <vscale x 16 x i1> %call232, ptr %alloca5, align 2
953 store <vscale x 4 x float> %call234, ptr %alloca6, align 16
954 store <vscale x 4 x float> %call236, ptr %alloca7, align 16
955 %load237 = load <vscale x 16 x i1>, ptr %alloca4, align 2
956 %load238 = load <vscale x 16 x i1>, ptr %alloca5, align 2
957 %load239 = load <vscale x 4 x float>, ptr %alloca6, align 16
958 %load240 = load <vscale x 4 x float>, ptr %alloca7, align 16
959 %call241 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load237)
960 %call242 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load238)
961 call void @llvm.aarch64.sme.mopa.nxv4f32(i32 2, <vscale x 4 x i1> %call241, <vscale x 4 x i1> %call242, <vscale x 4 x float> %load239, <vscale x 4 x float> %load240)
962 %load243 = load <vscale x 32 x i1>, ptr %alloca31, align 2
963 %call244 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load243, i64 16)
964 %load245 = load <vscale x 32 x i1>, ptr %alloca32, align 2
965 %call246 = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1> %load245, i64 16)
966 %load247 = load <vscale x 8 x float>, ptr %alloca33, align 16
967 %call248 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load247, i64 4)
968 %load249 = load <vscale x 8 x float>, ptr %alloca34, align 16
969 %call250 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> %load249, i64 4)
970 store <vscale x 16 x i1> %call244, ptr %alloca, align 2
971 store <vscale x 16 x i1> %call246, ptr %alloca1, align 2
972 store <vscale x 4 x float> %call248, ptr %alloca2, align 16
973 store <vscale x 4 x float> %call250, ptr %alloca3, align 16
974 %load251 = load <vscale x 16 x i1>, ptr %alloca, align 2
975 %load252 = load <vscale x 16 x i1>, ptr %alloca1, align 2
976 %load253 = load <vscale x 4 x float>, ptr %alloca2, align 16
977 %load254 = load <vscale x 4 x float>, ptr %alloca3, align 16
978 %call255 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load251)
979 %call256 = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %load252)
980 call void @llvm.aarch64.sme.mopa.nxv4f32(i32 3, <vscale x 4 x i1> %call255, <vscale x 4 x i1> %call256, <vscale x 4 x float> %load253, <vscale x 4 x float> %load254)
981 %load257 = load i64, ptr %alloca24, align 8
982 %add258 = add i64 %load257, -1
983 store i64 %add258, ptr %alloca24, align 8
987 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
988 declare target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c32(i64, i64, i32 immarg) #2
990 ; Function Attrs: nocallback nofree nosync nounwind willreturn
991 declare void @llvm.aarch64.sme.zero(i32 immarg) #3
993 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
994 declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount"), i32 immarg) #2
996 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
997 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>) #2
999 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
1000 declare <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1>, <vscale x 16 x i1>, i64 immarg) #4
1002 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read)
1003 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount"), ptr) #5
1005 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
1006 declare <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float>, <vscale x 4 x float>, i64 immarg) #4
1008 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
1009 declare <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv32i1(<vscale x 32 x i1>, i64 immarg) #4
1011 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
1012 declare <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float>, i64 immarg) #4
1014 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
1015 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>) #2
1017 ; Function Attrs: nocallback nofree nosync nounwind willreturn
1018 declare void @llvm.aarch64.sme.mopa.nxv4f32(i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) #3
1020 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
1021 declare target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c8(i64, i64, i32 immarg) #2
1023 ; Function Attrs: nocallback nofree nosync nounwind willreturn
1024 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32, i32) #3
1026 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
1027 declare <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8>, <vscale x 16 x i8>, i64 immarg) #4
1029 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
1030 declare <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8>, i64 immarg) #4
1032 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
1033 declare void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr) #6
1035 attributes #0 = { cold noreturn nounwind }
1036 attributes #1 = { mustprogress noinline optnone ssp uwtable(sync) vscale_range(1,16) "aarch64_new_za" "aarch64_pstate_sm_enabled" "frame-pointer"="non-leaf" "target-features"="+fp-armv8,+fullfp16,+sme,+sme-f64f64,+sme2" }
1037 attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
1038 attributes #3 = { nocallback nofree nosync nounwind willreturn }
1039 attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1040 attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
1041 attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }