1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s | FileCheck %s
4 target triple = "aarch64-unknown-unknown-eabi-elf"
6 ; This test verifies that call arguments and results are not coalesced
7 ; with SVE vector registers by the coalescer, such that no 'mul vl'
8 ; ldr/str pairs are generated in the streaming-mode-changing call
15 define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 {
16 ; CHECK-LABEL: dont_coalesce_arg_i8:
18 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
20 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
21 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
22 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
23 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
24 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
25 ; CHECK-NEXT: addvl sp, sp, #-1
26 ; CHECK-NEXT: fmov s0, w0
27 ; CHECK-NEXT: mov x19, x1
28 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
29 ; CHECK-NEXT: smstop sm
30 ; CHECK-NEXT: bl use_i8
31 ; CHECK-NEXT: smstart sm
32 ; CHECK-NEXT: ptrue p0.b
33 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
34 ; CHECK-NEXT: st1b { z0.b }, p0, [x19]
35 ; CHECK-NEXT: addvl sp, sp, #1
36 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
37 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
38 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
39 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
40 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
41 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
43 %vec = insertelement <vscale x 16 x i8> poison, i8 %arg, i32 0
44 call void @use_i8(i8 %arg)
45 store <vscale x 16 x i8> %vec, ptr %ptr
49 define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 {
50 ; CHECK-LABEL: dont_coalesce_arg_i16:
52 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
54 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
55 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
56 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
57 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
58 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
59 ; CHECK-NEXT: addvl sp, sp, #-1
60 ; CHECK-NEXT: fmov s0, w0
61 ; CHECK-NEXT: mov x19, x1
62 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
63 ; CHECK-NEXT: smstop sm
64 ; CHECK-NEXT: bl use_i16
65 ; CHECK-NEXT: smstart sm
66 ; CHECK-NEXT: ptrue p0.h
67 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
68 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
69 ; CHECK-NEXT: addvl sp, sp, #1
70 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
71 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
72 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
73 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
74 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
75 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
77 %vec = insertelement <vscale x 8 x i16> poison, i16 %arg, i32 0
78 call void @use_i16(i16 %arg)
79 store <vscale x 8 x i16> %vec, ptr %ptr
83 define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 {
84 ; CHECK-LABEL: dont_coalesce_arg_i32:
86 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
88 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
89 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
90 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
91 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
92 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
93 ; CHECK-NEXT: addvl sp, sp, #-1
94 ; CHECK-NEXT: fmov s0, w0
95 ; CHECK-NEXT: mov x19, x1
96 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
97 ; CHECK-NEXT: smstop sm
98 ; CHECK-NEXT: bl use_i32
99 ; CHECK-NEXT: smstart sm
100 ; CHECK-NEXT: ptrue p0.s
101 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
102 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
103 ; CHECK-NEXT: addvl sp, sp, #1
104 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
105 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
106 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
107 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
108 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
109 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
111 %vec = insertelement <vscale x 4 x i32> poison, i32 %arg, i32 0
112 call void @use_i32(i32 %arg)
113 store <vscale x 4 x i32> %vec, ptr %ptr
117 define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 {
118 ; CHECK-LABEL: dont_coalesce_arg_i64:
120 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
121 ; CHECK-NEXT: cntd x9
122 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
123 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
124 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
125 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
126 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
127 ; CHECK-NEXT: addvl sp, sp, #-1
128 ; CHECK-NEXT: fmov d0, x0
129 ; CHECK-NEXT: mov x19, x1
130 ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
131 ; CHECK-NEXT: smstop sm
132 ; CHECK-NEXT: bl use_i64
133 ; CHECK-NEXT: smstart sm
134 ; CHECK-NEXT: ptrue p0.d
135 ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
136 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
137 ; CHECK-NEXT: addvl sp, sp, #1
138 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
139 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
140 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
141 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
142 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
143 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
145 %vec = insertelement <vscale x 2 x i64> poison, i64 %arg, i32 0
146 call void @use_i64(i64 %arg)
147 store <vscale x 2 x i64> %vec, ptr %ptr
151 define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 {
152 ; CHECK-LABEL: dont_coalesce_arg_f16:
154 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
155 ; CHECK-NEXT: cntd x9
156 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
157 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
158 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
159 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
160 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
161 ; CHECK-NEXT: sub sp, sp, #16
162 ; CHECK-NEXT: addvl sp, sp, #-1
163 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
164 ; CHECK-NEXT: add x8, sp, #16
165 ; CHECK-NEXT: mov x19, x0
166 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
167 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
168 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill
169 ; CHECK-NEXT: smstop sm
170 ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload
171 ; CHECK-NEXT: bl use_f16
172 ; CHECK-NEXT: smstart sm
173 ; CHECK-NEXT: ptrue p0.h
174 ; CHECK-NEXT: add x8, sp, #16
175 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
176 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
177 ; CHECK-NEXT: addvl sp, sp, #1
178 ; CHECK-NEXT: add sp, sp, #16
179 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
180 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
181 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
182 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
183 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
184 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
186 %vec = insertelement <vscale x 8 x half> poison, half %arg, i32 0
187 call void @use_f16(half %arg)
188 store <vscale x 8 x half> %vec, ptr %ptr
192 define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 {
193 ; CHECK-LABEL: dont_coalesce_arg_f32:
195 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
196 ; CHECK-NEXT: cntd x9
197 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
198 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
199 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
200 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
201 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
202 ; CHECK-NEXT: sub sp, sp, #16
203 ; CHECK-NEXT: addvl sp, sp, #-1
204 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
205 ; CHECK-NEXT: add x8, sp, #16
206 ; CHECK-NEXT: mov x19, x0
207 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
208 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
209 ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
210 ; CHECK-NEXT: smstop sm
211 ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
212 ; CHECK-NEXT: bl use_f32
213 ; CHECK-NEXT: smstart sm
214 ; CHECK-NEXT: ptrue p0.s
215 ; CHECK-NEXT: add x8, sp, #16
216 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
217 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
218 ; CHECK-NEXT: addvl sp, sp, #1
219 ; CHECK-NEXT: add sp, sp, #16
220 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
221 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
222 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
223 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
224 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
225 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
227 %vec = insertelement <vscale x 4 x float> poison, float %arg, i32 0
228 call void @use_f32(float %arg)
229 store <vscale x 4 x float> %vec, ptr %ptr
233 define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 {
234 ; CHECK-LABEL: dont_coalesce_arg_f64:
236 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
237 ; CHECK-NEXT: cntd x9
238 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
239 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
240 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
241 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
242 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
243 ; CHECK-NEXT: sub sp, sp, #16
244 ; CHECK-NEXT: addvl sp, sp, #-1
245 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
246 ; CHECK-NEXT: add x8, sp, #16
247 ; CHECK-NEXT: mov x19, x0
248 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
249 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
250 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
251 ; CHECK-NEXT: smstop sm
252 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
253 ; CHECK-NEXT: bl use_f64
254 ; CHECK-NEXT: smstart sm
255 ; CHECK-NEXT: ptrue p0.d
256 ; CHECK-NEXT: add x8, sp, #16
257 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
258 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
259 ; CHECK-NEXT: addvl sp, sp, #1
260 ; CHECK-NEXT: add sp, sp, #16
261 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
262 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
263 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
264 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
265 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
266 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
268 %vec = insertelement <vscale x 2 x double> poison, double %arg, i32 0
269 call void @use_f64(double %arg)
270 store <vscale x 2 x double> %vec, ptr %ptr
276 ; Single-element vector arguments
279 define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 {
280 ; CHECK-LABEL: dont_coalesce_arg_v1i8:
282 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
283 ; CHECK-NEXT: cntd x9
284 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
285 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
286 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
287 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
288 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
289 ; CHECK-NEXT: sub sp, sp, #16
290 ; CHECK-NEXT: addvl sp, sp, #-1
291 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
292 ; CHECK-NEXT: add x8, sp, #16
293 ; CHECK-NEXT: mov x19, x0
294 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
295 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
296 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
297 ; CHECK-NEXT: smstop sm
298 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
299 ; CHECK-NEXT: bl use_v16i8
300 ; CHECK-NEXT: smstart sm
301 ; CHECK-NEXT: ptrue p0.b
302 ; CHECK-NEXT: add x8, sp, #16
303 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
304 ; CHECK-NEXT: st1b { z0.b }, p0, [x19]
305 ; CHECK-NEXT: addvl sp, sp, #1
306 ; CHECK-NEXT: add sp, sp, #16
307 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
308 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
309 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
310 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
311 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
312 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
314 %elt = extractelement <1 x i8> %arg, i32 0
315 %vec = insertelement <vscale x 16 x i8> poison, i8 %elt, i32 0
316 call void @use_v16i8(<1 x i8> %arg)
317 store <vscale x 16 x i8> %vec, ptr %ptr
321 define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 {
322 ; CHECK-LABEL: dont_coalesce_arg_v1i16:
324 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
325 ; CHECK-NEXT: cntd x9
326 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
327 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
328 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
329 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
330 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
331 ; CHECK-NEXT: sub sp, sp, #16
332 ; CHECK-NEXT: addvl sp, sp, #-1
333 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
334 ; CHECK-NEXT: add x8, sp, #16
335 ; CHECK-NEXT: mov x19, x0
336 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
337 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
338 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
339 ; CHECK-NEXT: smstop sm
340 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
341 ; CHECK-NEXT: bl use_v8i16
342 ; CHECK-NEXT: smstart sm
343 ; CHECK-NEXT: ptrue p0.h
344 ; CHECK-NEXT: add x8, sp, #16
345 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
346 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
347 ; CHECK-NEXT: addvl sp, sp, #1
348 ; CHECK-NEXT: add sp, sp, #16
349 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
350 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
351 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
352 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
353 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
354 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
356 %elt = extractelement <1 x i16> %arg, i32 0
357 %vec = insertelement <vscale x 8 x i16> poison, i16 %elt, i32 0
358 call void @use_v8i16(<1 x i16> %arg)
359 store <vscale x 8 x i16> %vec, ptr %ptr
363 define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 {
364 ; CHECK-LABEL: dont_coalesce_arg_v1i32:
366 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
367 ; CHECK-NEXT: cntd x9
368 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
369 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
370 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
371 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
372 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
373 ; CHECK-NEXT: sub sp, sp, #16
374 ; CHECK-NEXT: addvl sp, sp, #-1
375 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
376 ; CHECK-NEXT: add x8, sp, #16
377 ; CHECK-NEXT: mov x19, x0
378 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
379 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
380 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
381 ; CHECK-NEXT: smstop sm
382 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
383 ; CHECK-NEXT: bl use_v4i32
384 ; CHECK-NEXT: smstart sm
385 ; CHECK-NEXT: ptrue p0.s
386 ; CHECK-NEXT: add x8, sp, #16
387 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
388 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
389 ; CHECK-NEXT: addvl sp, sp, #1
390 ; CHECK-NEXT: add sp, sp, #16
391 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
392 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
393 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
394 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
395 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
396 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
398 %elt = extractelement <1 x i32> %arg, i32 0
399 %vec = insertelement <vscale x 4 x i32> poison, i32 %elt, i32 0
400 call void @use_v4i32(<1 x i32> %arg)
401 store <vscale x 4 x i32> %vec, ptr %ptr
405 define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 {
406 ; CHECK-LABEL: dont_coalesce_arg_v1i64:
408 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
409 ; CHECK-NEXT: cntd x9
410 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
411 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
412 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
413 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
414 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
415 ; CHECK-NEXT: sub sp, sp, #16
416 ; CHECK-NEXT: addvl sp, sp, #-1
417 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
418 ; CHECK-NEXT: add x8, sp, #16
419 ; CHECK-NEXT: mov x19, x0
420 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
421 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
422 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
423 ; CHECK-NEXT: smstop sm
424 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
425 ; CHECK-NEXT: bl use_v2i64
426 ; CHECK-NEXT: smstart sm
427 ; CHECK-NEXT: ptrue p0.d
428 ; CHECK-NEXT: add x8, sp, #16
429 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
430 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
431 ; CHECK-NEXT: addvl sp, sp, #1
432 ; CHECK-NEXT: add sp, sp, #16
433 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
434 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
435 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
436 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
437 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
438 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
440 %elt = extractelement <1 x i64> %arg, i32 0
441 %vec = insertelement <vscale x 2 x i64> poison, i64 %elt, i32 0
442 call void @use_v2i64(<1 x i64> %arg)
443 store <vscale x 2 x i64> %vec, ptr %ptr
447 define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 {
448 ; CHECK-LABEL: dont_coalesce_arg_v1f16:
450 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
451 ; CHECK-NEXT: cntd x9
452 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
453 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
454 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
455 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
456 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
457 ; CHECK-NEXT: sub sp, sp, #16
458 ; CHECK-NEXT: addvl sp, sp, #-1
459 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
460 ; CHECK-NEXT: add x8, sp, #16
461 ; CHECK-NEXT: mov x19, x0
462 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
463 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
464 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill
465 ; CHECK-NEXT: smstop sm
466 ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload
467 ; CHECK-NEXT: bl use_v8f16
468 ; CHECK-NEXT: smstart sm
469 ; CHECK-NEXT: ptrue p0.h
470 ; CHECK-NEXT: add x8, sp, #16
471 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
472 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
473 ; CHECK-NEXT: addvl sp, sp, #1
474 ; CHECK-NEXT: add sp, sp, #16
475 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
476 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
477 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
478 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
479 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
480 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
482 %elt = extractelement <1 x half> %arg, i32 0
483 %vec = insertelement <vscale x 8 x half> poison, half %elt, i32 0
484 call void @use_v8f16(<1 x half> %arg)
485 store <vscale x 8 x half> %vec, ptr %ptr
489 define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 {
490 ; CHECK-LABEL: dont_coalesce_arg_v1f32:
492 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
493 ; CHECK-NEXT: cntd x9
494 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
495 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
496 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
497 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
498 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
499 ; CHECK-NEXT: sub sp, sp, #16
500 ; CHECK-NEXT: addvl sp, sp, #-1
501 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
502 ; CHECK-NEXT: add x8, sp, #16
503 ; CHECK-NEXT: mov x19, x0
504 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
505 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
506 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
507 ; CHECK-NEXT: smstop sm
508 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
509 ; CHECK-NEXT: bl use_v4f32
510 ; CHECK-NEXT: smstart sm
511 ; CHECK-NEXT: ptrue p0.s
512 ; CHECK-NEXT: add x8, sp, #16
513 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
514 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
515 ; CHECK-NEXT: addvl sp, sp, #1
516 ; CHECK-NEXT: add sp, sp, #16
517 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
518 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
519 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
520 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
521 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
522 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
524 %elt = extractelement <1 x float> %arg, i32 0
525 %vec = insertelement <vscale x 4 x float> poison, float %elt, i32 0
526 call void @use_v4f32(<1 x float> %arg)
527 store <vscale x 4 x float> %vec, ptr %ptr
531 define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 {
532 ; CHECK-LABEL: dont_coalesce_arg_v1f64:
534 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
535 ; CHECK-NEXT: cntd x9
536 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
537 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
538 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
539 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
540 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
541 ; CHECK-NEXT: sub sp, sp, #16
542 ; CHECK-NEXT: addvl sp, sp, #-1
543 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
544 ; CHECK-NEXT: add x8, sp, #16
545 ; CHECK-NEXT: mov x19, x0
546 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
547 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
548 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
549 ; CHECK-NEXT: smstop sm
550 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
551 ; CHECK-NEXT: bl use_v2f64
552 ; CHECK-NEXT: smstart sm
553 ; CHECK-NEXT: ptrue p0.d
554 ; CHECK-NEXT: add x8, sp, #16
555 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
556 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
557 ; CHECK-NEXT: addvl sp, sp, #1
558 ; CHECK-NEXT: add sp, sp, #16
559 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
560 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
561 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
562 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
563 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
564 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
566 %elt = extractelement <1 x double> %arg, i32 0
567 %vec = insertelement <vscale x 2 x double> poison, double %elt, i32 0
568 call void @use_v2f64(<1 x double> %arg)
569 store <vscale x 2 x double> %vec, ptr %ptr
574 ; Full vector arguments
577 define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 {
578 ; CHECK-LABEL: dont_coalesce_arg_v16i8:
580 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
581 ; CHECK-NEXT: cntd x9
582 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
583 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
584 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
585 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
586 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
587 ; CHECK-NEXT: sub sp, sp, #16
588 ; CHECK-NEXT: addvl sp, sp, #-1
589 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
590 ; CHECK-NEXT: add x8, sp, #16
591 ; CHECK-NEXT: mov x19, x0
592 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
593 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
594 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
595 ; CHECK-NEXT: smstop sm
596 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
597 ; CHECK-NEXT: bl use_v16i8
598 ; CHECK-NEXT: smstart sm
599 ; CHECK-NEXT: ptrue p0.b
600 ; CHECK-NEXT: add x8, sp, #16
601 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
602 ; CHECK-NEXT: st1b { z0.b }, p0, [x19]
603 ; CHECK-NEXT: addvl sp, sp, #1
604 ; CHECK-NEXT: add sp, sp, #16
605 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
606 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
607 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
608 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
609 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
610 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
612 %vec = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> %arg, i64 0)
613 call void @use_v16i8(<16 x i8> %arg)
614 store <vscale x 16 x i8> %vec, ptr %ptr
618 define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 {
619 ; CHECK-LABEL: dont_coalesce_arg_v8i16:
621 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
622 ; CHECK-NEXT: cntd x9
623 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
624 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
625 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
626 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
627 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
628 ; CHECK-NEXT: sub sp, sp, #16
629 ; CHECK-NEXT: addvl sp, sp, #-1
630 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
631 ; CHECK-NEXT: add x8, sp, #16
632 ; CHECK-NEXT: mov x19, x0
633 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
634 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
635 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
636 ; CHECK-NEXT: smstop sm
637 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
638 ; CHECK-NEXT: bl use_v8i16
639 ; CHECK-NEXT: smstart sm
640 ; CHECK-NEXT: ptrue p0.h
641 ; CHECK-NEXT: add x8, sp, #16
642 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
643 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
644 ; CHECK-NEXT: addvl sp, sp, #1
645 ; CHECK-NEXT: add sp, sp, #16
646 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
647 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
648 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
649 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
650 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
651 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
653 %vec = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> %arg, i64 0)
654 call void @use_v8i16(<8 x i16> %arg)
655 store <vscale x 8 x i16> %vec, ptr %ptr
659 define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 {
660 ; CHECK-LABEL: dont_coalesce_arg_v4i32:
662 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
663 ; CHECK-NEXT: cntd x9
664 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
665 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
666 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
667 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
668 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
669 ; CHECK-NEXT: sub sp, sp, #16
670 ; CHECK-NEXT: addvl sp, sp, #-1
671 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
672 ; CHECK-NEXT: add x8, sp, #16
673 ; CHECK-NEXT: mov x19, x0
674 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
675 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
676 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
677 ; CHECK-NEXT: smstop sm
678 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
679 ; CHECK-NEXT: bl use_v4i32
680 ; CHECK-NEXT: smstart sm
681 ; CHECK-NEXT: ptrue p0.s
682 ; CHECK-NEXT: add x8, sp, #16
683 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
684 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
685 ; CHECK-NEXT: addvl sp, sp, #1
686 ; CHECK-NEXT: add sp, sp, #16
687 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
688 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
689 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
690 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
691 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
692 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
694 %vec = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> %arg, i64 0)
695 call void @use_v4i32(<4 x i32> %arg)
696 store <vscale x 4 x i32> %vec, ptr %ptr
700 define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 {
701 ; CHECK-LABEL: dont_coalesce_arg_v2i64:
703 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
704 ; CHECK-NEXT: cntd x9
705 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
706 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
707 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
708 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
709 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
710 ; CHECK-NEXT: sub sp, sp, #16
711 ; CHECK-NEXT: addvl sp, sp, #-1
712 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
713 ; CHECK-NEXT: add x8, sp, #16
714 ; CHECK-NEXT: mov x19, x0
715 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
716 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
717 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
718 ; CHECK-NEXT: smstop sm
719 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
720 ; CHECK-NEXT: bl use_v2i64
721 ; CHECK-NEXT: smstart sm
722 ; CHECK-NEXT: ptrue p0.d
723 ; CHECK-NEXT: add x8, sp, #16
724 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
725 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
726 ; CHECK-NEXT: addvl sp, sp, #1
727 ; CHECK-NEXT: add sp, sp, #16
728 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
729 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
730 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
731 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
732 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
733 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
735 %vec = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> %arg, i64 0)
736 call void @use_v2i64(<2 x i64> %arg)
737 store <vscale x 2 x i64> %vec, ptr %ptr
741 define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 {
742 ; CHECK-LABEL: dont_coalesce_arg_v8f16:
744 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
745 ; CHECK-NEXT: cntd x9
746 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
747 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
748 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
749 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
750 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
751 ; CHECK-NEXT: sub sp, sp, #16
752 ; CHECK-NEXT: addvl sp, sp, #-1
753 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
754 ; CHECK-NEXT: add x8, sp, #16
755 ; CHECK-NEXT: mov x19, x0
756 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
757 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
758 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
759 ; CHECK-NEXT: smstop sm
760 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
761 ; CHECK-NEXT: bl use_v8f16
762 ; CHECK-NEXT: smstart sm
763 ; CHECK-NEXT: ptrue p0.h
764 ; CHECK-NEXT: add x8, sp, #16
765 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
766 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
767 ; CHECK-NEXT: addvl sp, sp, #1
768 ; CHECK-NEXT: add sp, sp, #16
769 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
770 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
771 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
772 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
773 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
774 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
776 %vec = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> %arg, i64 0)
777 call void @use_v8f16(<8 x half> %arg)
778 store <vscale x 8 x half> %vec, ptr %ptr
782 define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 {
783 ; CHECK-LABEL: dont_coalesce_arg_v8bf16:
785 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
786 ; CHECK-NEXT: cntd x9
787 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
788 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
789 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
790 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
791 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
792 ; CHECK-NEXT: sub sp, sp, #16
793 ; CHECK-NEXT: addvl sp, sp, #-1
794 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
795 ; CHECK-NEXT: add x8, sp, #16
796 ; CHECK-NEXT: mov x19, x0
797 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
798 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
799 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
800 ; CHECK-NEXT: smstop sm
801 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
802 ; CHECK-NEXT: bl use_v8bf16
803 ; CHECK-NEXT: smstart sm
804 ; CHECK-NEXT: ptrue p0.h
805 ; CHECK-NEXT: add x8, sp, #16
806 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
807 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
808 ; CHECK-NEXT: addvl sp, sp, #1
809 ; CHECK-NEXT: add sp, sp, #16
810 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
811 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
812 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
813 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
814 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
815 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
817 %vec = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> poison, <8 x bfloat> %arg, i64 0)
818 call void @use_v8bf16(<8 x bfloat> %arg)
819 store <vscale x 8 x bfloat> %vec, ptr %ptr
823 define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 {
824 ; CHECK-LABEL: dont_coalesce_arg_v4f32:
826 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
827 ; CHECK-NEXT: cntd x9
828 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
829 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
830 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
831 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
832 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
833 ; CHECK-NEXT: sub sp, sp, #16
834 ; CHECK-NEXT: addvl sp, sp, #-1
835 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
836 ; CHECK-NEXT: add x8, sp, #16
837 ; CHECK-NEXT: mov x19, x0
838 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
839 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
840 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
841 ; CHECK-NEXT: smstop sm
842 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
843 ; CHECK-NEXT: bl use_v4f32
844 ; CHECK-NEXT: smstart sm
845 ; CHECK-NEXT: ptrue p0.d
846 ; CHECK-NEXT: add x8, sp, #16
847 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
848 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
849 ; CHECK-NEXT: addvl sp, sp, #1
850 ; CHECK-NEXT: add sp, sp, #16
851 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
852 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
853 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
854 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
855 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
856 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
858 %vec = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %arg, i64 0)
859 call void @use_v4f32(<4 x float> %arg)
860 store <vscale x 4 x float> %vec, ptr %ptr
864 define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 {
865 ; CHECK-LABEL: dont_coalesce_arg_v2f64:
867 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
868 ; CHECK-NEXT: cntd x9
869 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
870 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
871 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
872 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
873 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
874 ; CHECK-NEXT: sub sp, sp, #16
875 ; CHECK-NEXT: addvl sp, sp, #-1
876 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
877 ; CHECK-NEXT: add x8, sp, #16
878 ; CHECK-NEXT: mov x19, x0
879 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
880 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
881 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
882 ; CHECK-NEXT: smstop sm
883 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
884 ; CHECK-NEXT: bl use_v2f64
885 ; CHECK-NEXT: smstart sm
886 ; CHECK-NEXT: ptrue p0.d
887 ; CHECK-NEXT: add x8, sp, #16
888 ; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
889 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
890 ; CHECK-NEXT: addvl sp, sp, #1
891 ; CHECK-NEXT: add sp, sp, #16
892 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
893 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
894 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
895 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
896 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
897 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
899 %vec = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> %arg, i64 0)
900 call void @use_v2f64(<2 x double> %arg)
901 store <vscale x 2 x double> %vec, ptr %ptr
906 ; <8 x i1> type will need type promotion.
908 define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 {
909 ; CHECK-LABEL: dont_coalesce_arg_v8i1:
911 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
912 ; CHECK-NEXT: cntd x9
913 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
914 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
915 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
916 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
917 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
918 ; CHECK-NEXT: sub sp, sp, #16
919 ; CHECK-NEXT: addvl sp, sp, #-1
920 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
921 ; CHECK-NEXT: mov z1.d, z0.d
922 ; CHECK-NEXT: ptrue p0.b
923 ; CHECK-NEXT: add x8, sp, #16
924 ; CHECK-NEXT: mov x19, x0
925 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
926 ; CHECK-NEXT: and z1.b, z1.b, #0x1
927 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
928 ; CHECK-NEXT: str p0, [x8, #7, mul vl] // 2-byte Folded Spill
929 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
930 ; CHECK-NEXT: smstop sm
931 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
932 ; CHECK-NEXT: bl use_v8i1
933 ; CHECK-NEXT: smstart sm
934 ; CHECK-NEXT: add x8, sp, #16
935 ; CHECK-NEXT: ldr p0, [x8, #7, mul vl] // 2-byte Folded Reload
936 ; CHECK-NEXT: str p0, [x19]
937 ; CHECK-NEXT: addvl sp, sp, #1
938 ; CHECK-NEXT: add sp, sp, #16
939 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
940 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
941 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
942 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
943 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
944 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
946 %vec = call <vscale x 16 x i1> @llvm.vector.insert.nxv8i1.v8i1(<vscale x 16 x i1> poison, <8 x i1> %arg, i64 0)
947 call void @use_v8i1(<8 x i1> %arg)
948 store <vscale x 16 x i1> %vec, ptr %ptr
953 ; Scalar return values
956 define void @dont_coalesce_res_i8(ptr %ptr) #0 {
957 ; CHECK-LABEL: dont_coalesce_res_i8:
959 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
960 ; CHECK-NEXT: cntd x9
961 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
962 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
963 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
964 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
965 ; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
966 ; CHECK-NEXT: mov x19, x0
967 ; CHECK-NEXT: smstop sm
968 ; CHECK-NEXT: bl get_i8
969 ; CHECK-NEXT: smstart sm
970 ; CHECK-NEXT: ptrue p0.b
971 ; CHECK-NEXT: fmov s0, w0
972 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
973 ; CHECK-NEXT: st1b { z0.b }, p0, [x19]
974 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
975 ; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
976 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
977 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
978 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
980 %res = call i8 @get_i8()
981 %vec = insertelement <vscale x 16 x i8> poison, i8 %res, i32 0
982 store <vscale x 16 x i8> %vec, ptr %ptr
986 define void @dont_coalesce_res_i16(ptr %ptr) #0 {
987 ; CHECK-LABEL: dont_coalesce_res_i16:
989 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
990 ; CHECK-NEXT: cntd x9
991 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
992 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
993 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
994 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
995 ; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
996 ; CHECK-NEXT: mov x19, x0
997 ; CHECK-NEXT: smstop sm
998 ; CHECK-NEXT: bl get_i16
999 ; CHECK-NEXT: smstart sm
1000 ; CHECK-NEXT: ptrue p0.h
1001 ; CHECK-NEXT: fmov s0, w0
1002 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1003 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
1004 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1005 ; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
1006 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
1007 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1008 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
1010 %res = call i16 @get_i16()
1011 %vec = insertelement <vscale x 8 x i16> poison, i16 %res, i32 0
1012 store <vscale x 8 x i16> %vec, ptr %ptr
1016 define void @dont_coalesce_res_i32(ptr %ptr) #0 {
1017 ; CHECK-LABEL: dont_coalesce_res_i32:
1019 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
1020 ; CHECK-NEXT: cntd x9
1021 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
1022 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
1023 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
1024 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
1025 ; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
1026 ; CHECK-NEXT: mov x19, x0
1027 ; CHECK-NEXT: smstop sm
1028 ; CHECK-NEXT: bl get_i32
1029 ; CHECK-NEXT: smstart sm
1030 ; CHECK-NEXT: ptrue p0.s
1031 ; CHECK-NEXT: fmov s0, w0
1032 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1033 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
1034 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1035 ; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
1036 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
1037 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1038 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
1040 %res = call i32 @get_i32()
1041 %vec = insertelement <vscale x 4 x i32> poison, i32 %res, i32 0
1042 store <vscale x 4 x i32> %vec, ptr %ptr
1046 define void @dont_coalesce_res_i64(ptr %ptr) #0 {
1047 ; CHECK-LABEL: dont_coalesce_res_i64:
1049 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
1050 ; CHECK-NEXT: cntd x9
1051 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
1052 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
1053 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
1054 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
1055 ; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
1056 ; CHECK-NEXT: mov x19, x0
1057 ; CHECK-NEXT: smstop sm
1058 ; CHECK-NEXT: bl get_i64
1059 ; CHECK-NEXT: smstart sm
1060 ; CHECK-NEXT: ptrue p0.d
1061 ; CHECK-NEXT: fmov d0, x0
1062 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1063 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
1064 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1065 ; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
1066 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
1067 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1068 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
1070 %res = call i64 @get_i64()
1071 %vec = insertelement <vscale x 2 x i64> poison, i64 %res, i32 0
1072 store <vscale x 2 x i64> %vec, ptr %ptr
1076 define void @dont_coalesce_res_f16(ptr %ptr) #0 {
1077 ; CHECK-LABEL: dont_coalesce_res_f16:
1079 ; CHECK-NEXT: sub sp, sp, #112
1080 ; CHECK-NEXT: cntd x9
1081 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1082 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1083 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1084 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1085 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1086 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1087 ; CHECK-NEXT: mov x19, x0
1088 ; CHECK-NEXT: smstop sm
1089 ; CHECK-NEXT: bl get_f16
1090 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill
1091 ; CHECK-NEXT: smstart sm
1092 ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload
1093 ; CHECK-NEXT: ptrue p0.h
1094 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
1095 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1096 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
1097 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1098 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1099 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1100 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1101 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1102 ; CHECK-NEXT: add sp, sp, #112
1104 %res = call half @get_f16()
1105 %vec = insertelement <vscale x 8 x half> poison, half %res, i32 0
1106 store <vscale x 8 x half> %vec, ptr %ptr
1110 define void @dont_coalesce_res_f32(ptr %ptr) #0 {
1111 ; CHECK-LABEL: dont_coalesce_res_f32:
1113 ; CHECK-NEXT: sub sp, sp, #112
1114 ; CHECK-NEXT: cntd x9
1115 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1116 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1117 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1118 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1119 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1120 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1121 ; CHECK-NEXT: mov x19, x0
1122 ; CHECK-NEXT: smstop sm
1123 ; CHECK-NEXT: bl get_f32
1124 ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
1125 ; CHECK-NEXT: smstart sm
1126 ; CHECK-NEXT: ptrue p0.s
1127 ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
1128 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1129 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
1130 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1131 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1132 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1133 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1134 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1135 ; CHECK-NEXT: add sp, sp, #112
1137 %res = call float @get_f32()
1138 %vec = insertelement <vscale x 4 x float> poison, float %res, i32 0
1139 store <vscale x 4 x float> %vec, ptr %ptr
1143 define void @dont_coalesce_res_f64(ptr %ptr) #0 {
1144 ; CHECK-LABEL: dont_coalesce_res_f64:
1146 ; CHECK-NEXT: sub sp, sp, #112
1147 ; CHECK-NEXT: cntd x9
1148 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1149 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1150 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1151 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1152 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1153 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1154 ; CHECK-NEXT: mov x19, x0
1155 ; CHECK-NEXT: smstop sm
1156 ; CHECK-NEXT: bl get_f64
1157 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
1158 ; CHECK-NEXT: smstart sm
1159 ; CHECK-NEXT: ptrue p0.d
1160 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1161 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1162 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
1163 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1164 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1165 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1166 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1167 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1168 ; CHECK-NEXT: add sp, sp, #112
1170 %res = call double @get_f64()
1171 %vec = insertelement <vscale x 2 x double> poison, double %res, i32 0
1172 store <vscale x 2 x double> %vec, ptr %ptr
1177 ; Single-element vector result values
1180 define void @dont_coalesce_res_v1i8(ptr %ptr) #0 {
1181 ; CHECK-LABEL: dont_coalesce_res_v1i8:
1183 ; CHECK-NEXT: sub sp, sp, #112
1184 ; CHECK-NEXT: cntd x9
1185 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1186 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1187 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1188 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1189 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1190 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1191 ; CHECK-NEXT: mov x19, x0
1192 ; CHECK-NEXT: smstop sm
1193 ; CHECK-NEXT: bl get_v1i8
1194 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
1195 ; CHECK-NEXT: smstart sm
1196 ; CHECK-NEXT: ptrue p0.b
1197 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1198 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1199 ; CHECK-NEXT: st1b { z0.b }, p0, [x19]
1200 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1201 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1202 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1203 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1204 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1205 ; CHECK-NEXT: add sp, sp, #112
1207 %res = call <1 x i8> @get_v1i8()
1208 %elt = extractelement <1 x i8> %res, i32 0
1209 %vec = insertelement <vscale x 16 x i8> poison, i8 %elt, i32 0
1210 store <vscale x 16 x i8> %vec, ptr %ptr
1214 define void @dont_coalesce_res_v1i16(ptr %ptr) #0 {
1215 ; CHECK-LABEL: dont_coalesce_res_v1i16:
1217 ; CHECK-NEXT: sub sp, sp, #112
1218 ; CHECK-NEXT: cntd x9
1219 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1220 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1221 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1222 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1223 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1224 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1225 ; CHECK-NEXT: mov x19, x0
1226 ; CHECK-NEXT: smstop sm
1227 ; CHECK-NEXT: bl get_v1i16
1228 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
1229 ; CHECK-NEXT: smstart sm
1230 ; CHECK-NEXT: ptrue p0.h
1231 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1232 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1233 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
1234 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1235 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1236 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1237 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1238 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1239 ; CHECK-NEXT: add sp, sp, #112
1241 %res = call <1 x i16> @get_v1i16()
1242 %elt = extractelement <1 x i16> %res, i32 0
1243 %vec = insertelement <vscale x 8 x i16> poison, i16 %elt, i32 0
1244 store <vscale x 8 x i16> %vec, ptr %ptr
1248 define void @dont_coalesce_res_v1i32(ptr %ptr) #0 {
1249 ; CHECK-LABEL: dont_coalesce_res_v1i32:
1251 ; CHECK-NEXT: sub sp, sp, #112
1252 ; CHECK-NEXT: cntd x9
1253 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1254 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1255 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1256 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1257 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1258 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1259 ; CHECK-NEXT: mov x19, x0
1260 ; CHECK-NEXT: smstop sm
1261 ; CHECK-NEXT: bl get_v1i32
1262 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
1263 ; CHECK-NEXT: smstart sm
1264 ; CHECK-NEXT: ptrue p0.s
1265 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1266 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1267 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
1268 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1269 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1270 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1271 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1272 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1273 ; CHECK-NEXT: add sp, sp, #112
1275 %res = call <1 x i32> @get_v1i32()
1276 %elt = extractelement <1 x i32> %res, i32 0
1277 %vec = insertelement <vscale x 4 x i32> poison, i32 %elt, i32 0
1278 store <vscale x 4 x i32> %vec, ptr %ptr
1282 define void @dont_coalesce_res_v1i64(ptr %ptr) #0 {
1283 ; CHECK-LABEL: dont_coalesce_res_v1i64:
1285 ; CHECK-NEXT: sub sp, sp, #112
1286 ; CHECK-NEXT: cntd x9
1287 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1288 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1289 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1290 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1291 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1292 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1293 ; CHECK-NEXT: mov x19, x0
1294 ; CHECK-NEXT: smstop sm
1295 ; CHECK-NEXT: bl get_v1i64
1296 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
1297 ; CHECK-NEXT: smstart sm
1298 ; CHECK-NEXT: ptrue p0.d
1299 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1300 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1301 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
1302 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1303 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1304 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1305 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1306 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1307 ; CHECK-NEXT: add sp, sp, #112
1309 %res = call <1 x i64> @get_v1i64()
1310 %elt = extractelement <1 x i64> %res, i32 0
1311 %vec = insertelement <vscale x 2 x i64> poison, i64 %elt, i32 0
1312 store <vscale x 2 x i64> %vec, ptr %ptr
1316 define void @dont_coalesce_res_v1f16(ptr %ptr) #0 {
1317 ; CHECK-LABEL: dont_coalesce_res_v1f16:
1319 ; CHECK-NEXT: sub sp, sp, #112
1320 ; CHECK-NEXT: cntd x9
1321 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1322 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1323 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1324 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1325 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1326 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1327 ; CHECK-NEXT: mov x19, x0
1328 ; CHECK-NEXT: smstop sm
1329 ; CHECK-NEXT: bl get_v1f16
1330 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill
1331 ; CHECK-NEXT: smstart sm
1332 ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload
1333 ; CHECK-NEXT: ptrue p0.h
1334 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
1335 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1336 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
1337 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1338 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1339 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1340 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1341 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1342 ; CHECK-NEXT: add sp, sp, #112
1344 %res = call <1 x half> @get_v1f16()
1345 %elt = extractelement <1 x half> %res, i32 0
1346 %vec = insertelement <vscale x 8 x half> poison, half %elt, i32 0
1347 store <vscale x 8 x half> %vec, ptr %ptr
1351 define void @dont_coalesce_res_v1f32(ptr %ptr) #0 {
1352 ; CHECK-LABEL: dont_coalesce_res_v1f32:
1354 ; CHECK-NEXT: sub sp, sp, #112
1355 ; CHECK-NEXT: cntd x9
1356 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1357 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1358 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1359 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1360 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1361 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1362 ; CHECK-NEXT: mov x19, x0
1363 ; CHECK-NEXT: smstop sm
1364 ; CHECK-NEXT: bl get_v1f32
1365 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
1366 ; CHECK-NEXT: smstart sm
1367 ; CHECK-NEXT: ptrue p0.s
1368 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1369 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1370 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
1371 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1372 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1373 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1374 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1375 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1376 ; CHECK-NEXT: add sp, sp, #112
1378 %res = call <1 x float> @get_v1f32()
1379 %elt = extractelement <1 x float> %res, i32 0
1380 %vec = insertelement <vscale x 4 x float> poison, float %elt, i32 0
1381 store <vscale x 4 x float> %vec, ptr %ptr
1385 define void @dont_coalesce_res_v1f64(ptr %ptr) #0 {
1386 ; CHECK-LABEL: dont_coalesce_res_v1f64:
1388 ; CHECK-NEXT: sub sp, sp, #112
1389 ; CHECK-NEXT: cntd x9
1390 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1391 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1392 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1393 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1394 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1395 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1396 ; CHECK-NEXT: mov x19, x0
1397 ; CHECK-NEXT: smstop sm
1398 ; CHECK-NEXT: bl get_v1f64
1399 ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
1400 ; CHECK-NEXT: smstart sm
1401 ; CHECK-NEXT: ptrue p0.d
1402 ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
1403 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1404 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
1405 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1406 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1407 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1408 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1409 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1410 ; CHECK-NEXT: add sp, sp, #112
1412 %res = call <1 x double> @get_v1f64()
1413 %elt = extractelement <1 x double> %res, i32 0
1414 %vec = insertelement <vscale x 2 x double> poison, double %elt, i32 0
1415 store <vscale x 2 x double> %vec, ptr %ptr
1420 ; Full vector result values
1423 define void @dont_coalesce_res_v16i8(ptr %ptr) #0 {
1424 ; CHECK-LABEL: dont_coalesce_res_v16i8:
1426 ; CHECK-NEXT: sub sp, sp, #112
1427 ; CHECK-NEXT: cntd x9
1428 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1429 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1430 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1431 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1432 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1433 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1434 ; CHECK-NEXT: mov x19, x0
1435 ; CHECK-NEXT: smstop sm
1436 ; CHECK-NEXT: bl get_v16i8
1437 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
1438 ; CHECK-NEXT: smstart sm
1439 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1440 ; CHECK-NEXT: ptrue p0.b
1441 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1442 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1443 ; CHECK-NEXT: st1b { z0.b }, p0, [x19]
1444 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1445 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1446 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1447 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1448 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1449 ; CHECK-NEXT: add sp, sp, #112
1451 %res = call <16 x i8> @get_v16i8()
1452 %vec = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> %res, i64 0)
1453 store <vscale x 16 x i8> %vec, ptr %ptr
1457 define void @dont_coalesce_res_v8i16(ptr %ptr) #0 {
1458 ; CHECK-LABEL: dont_coalesce_res_v8i16:
1460 ; CHECK-NEXT: sub sp, sp, #112
1461 ; CHECK-NEXT: cntd x9
1462 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1463 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1464 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1465 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1466 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1467 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1468 ; CHECK-NEXT: mov x19, x0
1469 ; CHECK-NEXT: smstop sm
1470 ; CHECK-NEXT: bl get_v8i16
1471 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
1472 ; CHECK-NEXT: smstart sm
1473 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1474 ; CHECK-NEXT: ptrue p0.h
1475 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1476 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1477 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
1478 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1479 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1480 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1481 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1482 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1483 ; CHECK-NEXT: add sp, sp, #112
1485 %res = call <8 x i16> @get_v8i16()
1486 %vec = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> %res, i64 0)
1487 store <vscale x 8 x i16> %vec, ptr %ptr
1491 define void @dont_coalesce_res_v4i32(ptr %ptr) #0 {
1492 ; CHECK-LABEL: dont_coalesce_res_v4i32:
1494 ; CHECK-NEXT: sub sp, sp, #112
1495 ; CHECK-NEXT: cntd x9
1496 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1497 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1498 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1499 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1500 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1501 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1502 ; CHECK-NEXT: mov x19, x0
1503 ; CHECK-NEXT: smstop sm
1504 ; CHECK-NEXT: bl get_v4i32
1505 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
1506 ; CHECK-NEXT: smstart sm
1507 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1508 ; CHECK-NEXT: ptrue p0.s
1509 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1510 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1511 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
1512 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1513 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1514 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1515 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1516 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1517 ; CHECK-NEXT: add sp, sp, #112
1519 %res = call <4 x i32> @get_v4i32()
1520 %vec = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> %res, i64 0)
1521 store <vscale x 4 x i32> %vec, ptr %ptr
1525 define void @dont_coalesce_res_v2i64(ptr %ptr) #0 {
1526 ; CHECK-LABEL: dont_coalesce_res_v2i64:
1528 ; CHECK-NEXT: sub sp, sp, #112
1529 ; CHECK-NEXT: cntd x9
1530 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1531 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1532 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1533 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1534 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1535 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1536 ; CHECK-NEXT: mov x19, x0
1537 ; CHECK-NEXT: smstop sm
1538 ; CHECK-NEXT: bl get_v2i64
1539 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
1540 ; CHECK-NEXT: smstart sm
1541 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1542 ; CHECK-NEXT: ptrue p0.d
1543 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1544 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1545 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
1546 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1547 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1548 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1549 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1550 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1551 ; CHECK-NEXT: add sp, sp, #112
1553 %res = call <2 x i64> @get_v2i64()
1554 %vec = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> %res, i64 0)
1555 store <vscale x 2 x i64> %vec, ptr %ptr
1559 define void @dont_coalesce_res_v8f16(ptr %ptr) #0 {
1560 ; CHECK-LABEL: dont_coalesce_res_v8f16:
1562 ; CHECK-NEXT: sub sp, sp, #112
1563 ; CHECK-NEXT: cntd x9
1564 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1565 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1566 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1567 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1568 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1569 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1570 ; CHECK-NEXT: mov x19, x0
1571 ; CHECK-NEXT: smstop sm
1572 ; CHECK-NEXT: bl get_v8f16
1573 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
1574 ; CHECK-NEXT: smstart sm
1575 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1576 ; CHECK-NEXT: ptrue p0.h
1577 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1578 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1579 ; CHECK-NEXT: st1h { z0.h }, p0, [x19]
1580 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1581 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1582 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1583 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1584 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1585 ; CHECK-NEXT: add sp, sp, #112
1587 %res = call <8 x half> @get_v8f16()
1588 %vec = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> %res, i64 0)
1589 store <vscale x 8 x half> %vec, ptr %ptr
1593 define void @dont_coalesce_res_v4f32(ptr %ptr) #0 {
1594 ; CHECK-LABEL: dont_coalesce_res_v4f32:
1596 ; CHECK-NEXT: sub sp, sp, #112
1597 ; CHECK-NEXT: cntd x9
1598 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1599 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1600 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1601 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1602 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1603 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1604 ; CHECK-NEXT: mov x19, x0
1605 ; CHECK-NEXT: smstop sm
1606 ; CHECK-NEXT: bl get_v4f32
1607 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
1608 ; CHECK-NEXT: smstart sm
1609 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1610 ; CHECK-NEXT: ptrue p0.s
1611 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1612 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1613 ; CHECK-NEXT: st1w { z0.s }, p0, [x19]
1614 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1615 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1616 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1617 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1618 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1619 ; CHECK-NEXT: add sp, sp, #112
1621 %res = call <4 x float> @get_v4f32()
1622 %vec = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %res, i64 0)
1623 store <vscale x 4 x float> %vec, ptr %ptr
1627 define void @dont_coalesce_res_v2f64(ptr %ptr) #0 {
1628 ; CHECK-LABEL: dont_coalesce_res_v2f64:
1630 ; CHECK-NEXT: sub sp, sp, #112
1631 ; CHECK-NEXT: cntd x9
1632 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1633 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1634 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1635 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1636 ; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
1637 ; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
1638 ; CHECK-NEXT: mov x19, x0
1639 ; CHECK-NEXT: smstop sm
1640 ; CHECK-NEXT: bl get_v2f64
1641 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
1642 ; CHECK-NEXT: smstart sm
1643 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1644 ; CHECK-NEXT: ptrue p0.d
1645 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1646 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
1647 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
1648 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
1649 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
1650 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
1651 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
1652 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
1653 ; CHECK-NEXT: add sp, sp, #112
1655 %res = call <2 x double> @get_v2f64()
1656 %vec = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> %res, i64 0)
1657 store <vscale x 2 x double> %vec, ptr %ptr
1661 declare half @get_f16()
1662 declare float @get_f32()
1663 declare double @get_f64()
1664 declare <1 x half> @get_v1f16()
1665 declare <1 x float> @get_v1f32()
1666 declare <1 x double> @get_v1f64()
1667 declare <8 x half> @get_v8f16()
1668 declare <4 x float> @get_v4f32()
1669 declare <2 x double> @get_v2f64()
1671 declare i8 @get_i8()
1672 declare i16 @get_i16()
1673 declare i32 @get_i32()
1674 declare i64 @get_i64()
1675 declare <1 x i8> @get_v1i8()
1676 declare <1 x i16> @get_v1i16()
1677 declare <1 x i32> @get_v1i32()
1678 declare <2 x i64> @get_v1i64()
1679 declare <16 x i8> @get_v16i8()
1680 declare <8 x i16> @get_v8i16()
1681 declare <4 x i32> @get_v4i32()
1682 declare <2 x i64> @get_v2i64()
1684 declare void @use_f16(half)
1685 declare void @use_f32(float)
1686 declare void @use_f64(double)
1687 declare void @use_v1f16(<1 x half>)
1688 declare void @use_v1f32(<1 x float>)
1689 declare void @use_v1f64(<1 x double>)
1690 declare void @use_v8f16(<8 x half>)
1691 declare void @use_v8bf16(<8 x bfloat>)
1692 declare void @use_v4f32(<4 x float>)
1693 declare void @use_v2f64(<2 x double>)
1695 declare void @use_i8(i8)
1696 declare void @use_i16(i16)
1697 declare void @use_i32(i32)
1698 declare void @use_i64(i64)
1699 declare void @use_v1i8(<1 x i8>)
1700 declare void @use_v1i16(<1 x i16>)
1701 declare void @use_v1i32(<1 x i32>)
1702 declare void @use_v1i64(<1 x i64>)
1703 declare void @use_v16i8(<16 x i8>)
1704 declare void @use_v8i16(<8 x i16>)
1705 declare void @use_v4i32(<4 x i32>)
1706 declare void @use_v2i64(<2 x i64>)
1707 declare void @use_v8i1(<8 x i1>)
1709 declare <vscale x 16 x i1> @llvm.vector.insert.nxv8i1.v8i1(<vscale x 16 x i1>, <8 x i1>, i64)
1710 declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64)
1711 declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64)
1712 declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64)
1713 declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
1714 declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64)
1715 declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64)
1716 declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64)
1718 attributes #0 = { nounwind "aarch64_pstate_sm_enabled" "target-features"="+sve,+sme" }