1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
4 ;;; Test store instructions
7 ;;; We test store instructions using general stack, stack with dynamic
8 ;;; allocation, stack with dynamic allocation and alignment, and stack
9 ;;; with dynamic allocation, alignment, and spill.
11 ;;; Fist test using a stack for leaf function.
13 ;;; | | Higher address
14 ;;; |----------------------------------------------| <- old sp
15 ;;; | Local variables of fixed size |
16 ;;; |----------------------------------------------| <- sp
19 ;;; Access local variable using sp (%s11). In addition, please remember
20 ;;; that stack is aligned by 16 bytes.
22 ;;; Second test using a general stack.
24 ;;; | | Higher address
25 ;;; |----------------------------------------------|
26 ;;; | Parameter area for this function |
27 ;;; |----------------------------------------------|
28 ;;; | Register save area (RSA) for this function |
29 ;;; |----------------------------------------------|
30 ;;; | Return address for this function |
31 ;;; |----------------------------------------------|
32 ;;; | Frame pointer for this function |
33 ;;; |----------------------------------------------| <- fp(=old sp)
34 ;;; | Local variables of fixed size |
35 ;;; |----------------------------------------------|
36 ;;; |.variable-sized.local.variables.(VLAs)........|
37 ;;; |..............................................|
38 ;;; |..............................................|
39 ;;; |----------------------------------------------| <- returned by alloca
40 ;;; | Parameter area for callee |
41 ;;; |----------------------------------------------|
42 ;;; | Register save area (RSA) for callee |
43 ;;; |----------------------------------------------|
44 ;;; | Return address for callee |
45 ;;; |----------------------------------------------|
46 ;;; | Frame pointer for callee |
47 ;;; |----------------------------------------------| <- sp
50 ;;; Access local variable using fp (%s9) since the size of VLA is not
51 ;;; known. At the beginning of the functions, allocates 240 + data
52 ;;; bytes. 240 means RSA+RA+FP (=176) + Parameter (=64).
54 ;;; Third test using a general stack.
56 ;;; | | Higher address
57 ;;; |----------------------------------------------|
58 ;;; | Parameter area for this function |
59 ;;; |----------------------------------------------|
60 ;;; | Register save area (RSA) for this function |
61 ;;; |----------------------------------------------|
62 ;;; | Return address for this function |
63 ;;; |----------------------------------------------|
64 ;;; | Frame pointer for this function |
65 ;;; |----------------------------------------------| <- fp(=old sp)
66 ;;; |.empty.space.to.make.part.below.aligned.in....|
67 ;;; |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
68 ;;; |.alignment....................................| unknown at compile time)
69 ;;; |----------------------------------------------|
70 ;;; | Local variables of fixed size including spill|
72 ;;; |----------------------------------------------| <- bp(not defined by ABI,
73 ;;; |.variable-sized.local.variables.(VLAs)........| LLVM chooses SX17)
74 ;;; |..............................................| (size of this area is
75 ;;; |..............................................| unknown at compile time)
76 ;;; |----------------------------------------------| <- stack top (returned by
77 ;;; | Parameter area for callee | alloca)
78 ;;; |----------------------------------------------|
79 ;;; | Register save area (RSA) for callee |
80 ;;; |----------------------------------------------|
81 ;;; | Return address for callee |
82 ;;; |----------------------------------------------|
83 ;;; | Frame pointer for callee |
84 ;;; |----------------------------------------------| <- sp
87 ;;; Access local variable using bp (%s17) since the size of alignment
88 ;;; and VLA are not known. At the beginning of the functions, allocates
89 ;;; pad(240 + data + align) bytes. Then, access data through bp + pad(240)
90 ;;; since this address doesn't change even if VLA is dynamically allocated.
92 ;;; Fourth test using a general stack with some spills.
95 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
96 define fastcc <256 x i1> @load__vm256_stk() {
97 ; CHECK-LABEL: load__vm256_stk:
99 ; CHECK-NEXT: st %s9, (, %s11)
100 ; CHECK-NEXT: st %s10, 8(, %s11)
101 ; CHECK-NEXT: or %s9, 0, %s11
102 ; CHECK-NEXT: lea %s11, -224(, %s11)
103 ; CHECK-NEXT: and %s11, %s11, (59)1
104 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2
105 ; CHECK-NEXT: # %bb.1:
106 ; CHECK-NEXT: ld %s61, 24(, %s14)
107 ; CHECK-NEXT: or %s62, 0, %s0
108 ; CHECK-NEXT: lea %s63, 315
109 ; CHECK-NEXT: shm.l %s63, (%s61)
110 ; CHECK-NEXT: shm.l %s8, 8(%s61)
111 ; CHECK-NEXT: shm.l %s11, 16(%s61)
113 ; CHECK-NEXT: or %s0, 0, %s62
114 ; CHECK-NEXT: .LBB0_2:
115 ; CHECK-NEXT: ld %s16, 192(, %s11)
116 ; CHECK-NEXT: lvm %vm1, 0, %s16
117 ; CHECK-NEXT: ld %s16, 200(, %s11)
118 ; CHECK-NEXT: lvm %vm1, 1, %s16
119 ; CHECK-NEXT: ld %s16, 208(, %s11)
120 ; CHECK-NEXT: lvm %vm1, 2, %s16
121 ; CHECK-NEXT: ld %s16, 216(, %s11)
122 ; CHECK-NEXT: lvm %vm1, 3, %s16
123 ; CHECK-NEXT: or %s11, 0, %s9
124 ; CHECK-NEXT: ld %s10, 8(, %s11)
125 ; CHECK-NEXT: ld %s9, (, %s11)
126 ; CHECK-NEXT: b.l.t (, %s10)
127 %1 = alloca <256 x i1>, align 32
128 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
129 %2 = load volatile <256 x i1>, ptr %1, align 32
130 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
134 ; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
135 declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
137 ; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
138 declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
140 ; Function Attrs: argmemonly nofree nounwind
141 define fastcc <256 x i1> @load__vm256_stk_big_fit() {
142 ; CHECK-LABEL: load__vm256_stk_big_fit:
144 ; CHECK-NEXT: st %s9, (, %s11)
145 ; CHECK-NEXT: st %s10, 8(, %s11)
146 ; CHECK-NEXT: or %s9, 0, %s11
147 ; CHECK-NEXT: lea %s11, -2147483648(, %s11)
148 ; CHECK-NEXT: and %s11, %s11, (59)1
149 ; CHECK-NEXT: brge.l %s11, %s8, .LBB1_4
150 ; CHECK-NEXT: # %bb.3:
151 ; CHECK-NEXT: ld %s61, 24(, %s14)
152 ; CHECK-NEXT: or %s62, 0, %s0
153 ; CHECK-NEXT: lea %s63, 315
154 ; CHECK-NEXT: shm.l %s63, (%s61)
155 ; CHECK-NEXT: shm.l %s8, 8(%s61)
156 ; CHECK-NEXT: shm.l %s11, 16(%s61)
158 ; CHECK-NEXT: or %s0, 0, %s62
159 ; CHECK-NEXT: .LBB1_4:
160 ; CHECK-NEXT: ld %s16, 2147483616(, %s11)
161 ; CHECK-NEXT: lvm %vm1, 0, %s16
162 ; CHECK-NEXT: ld %s16, 2147483624(, %s11)
163 ; CHECK-NEXT: lvm %vm1, 1, %s16
164 ; CHECK-NEXT: ld %s16, 2147483632(, %s11)
165 ; CHECK-NEXT: lvm %vm1, 2, %s16
166 ; CHECK-NEXT: ld %s16, 2147483640(, %s11)
167 ; CHECK-NEXT: lvm %vm1, 3, %s16
168 ; CHECK-NEXT: or %s0, 0, (0)1
169 ; CHECK-NEXT: lea %s1, 2147483424
170 ; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
171 ; CHECK-NEXT: ld %s2, 192(%s0, %s11)
172 ; CHECK-NEXT: lea %s0, 8(, %s0)
173 ; CHECK-NEXT: brne.l %s0, %s1, .LBB1_1
174 ; CHECK-NEXT: # %bb.2:
175 ; CHECK-NEXT: or %s11, 0, %s9
176 ; CHECK-NEXT: ld %s10, 8(, %s11)
177 ; CHECK-NEXT: ld %s9, (, %s11)
178 ; CHECK-NEXT: b.l.t (, %s10)
179 %1 = alloca <256 x i1>, align 32
180 %2 = alloca [268435428 x i64], align 8
181 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
182 call void @llvm.lifetime.start.p0(i64 2147483424, ptr nonnull %2)
183 %3 = load volatile <256 x i1>, ptr %1, align 32
187 call void @llvm.lifetime.end.p0(i64 2147483424, ptr nonnull %2)
188 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
192 %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
193 %7 = getelementptr inbounds [268435428 x i64], ptr %2, i64 0, i64 %6
194 %8 = load volatile i64, ptr %7, align 8, !tbaa !3
195 %9 = add nuw nsw i64 %6, 1
196 %10 = icmp eq i64 %9, 268435428
197 br i1 %10, label %4, label %5, !llvm.loop !7
200 ; Function Attrs: argmemonly nofree nounwind
201 define fastcc <256 x i1> @load__vm256_stk_big() {
202 ; CHECK-LABEL: load__vm256_stk_big:
204 ; CHECK-NEXT: st %s9, (, %s11)
205 ; CHECK-NEXT: st %s10, 8(, %s11)
206 ; CHECK-NEXT: or %s9, 0, %s11
207 ; CHECK-NEXT: lea %s13, 2147483616
208 ; CHECK-NEXT: and %s13, %s13, (32)0
209 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11)
210 ; CHECK-NEXT: and %s11, %s11, (59)1
211 ; CHECK-NEXT: brge.l %s11, %s8, .LBB2_4
212 ; CHECK-NEXT: # %bb.3:
213 ; CHECK-NEXT: ld %s61, 24(, %s14)
214 ; CHECK-NEXT: or %s62, 0, %s0
215 ; CHECK-NEXT: lea %s63, 315
216 ; CHECK-NEXT: shm.l %s63, (%s61)
217 ; CHECK-NEXT: shm.l %s8, 8(%s61)
218 ; CHECK-NEXT: shm.l %s11, 16(%s61)
220 ; CHECK-NEXT: or %s0, 0, %s62
221 ; CHECK-NEXT: .LBB2_4:
222 ; CHECK-NEXT: lea %s13, -2147483648
223 ; CHECK-NEXT: and %s13, %s13, (32)0
224 ; CHECK-NEXT: lea.sl %s13, (%s11, %s13)
225 ; CHECK-NEXT: ld %s16, (, %s13)
226 ; CHECK-NEXT: lvm %vm1, 0, %s16
227 ; CHECK-NEXT: ld %s16, 8(, %s13)
228 ; CHECK-NEXT: lvm %vm1, 1, %s16
229 ; CHECK-NEXT: ld %s16, 16(, %s13)
230 ; CHECK-NEXT: lvm %vm1, 2, %s16
231 ; CHECK-NEXT: ld %s16, 24(, %s13)
232 ; CHECK-NEXT: lvm %vm1, 3, %s16
233 ; CHECK-NEXT: or %s0, 0, (0)1
234 ; CHECK-NEXT: lea %s1, 2147483432
235 ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
236 ; CHECK-NEXT: ld %s2, 216(%s0, %s11)
237 ; CHECK-NEXT: lea %s0, 8(, %s0)
238 ; CHECK-NEXT: brne.l %s0, %s1, .LBB2_1
239 ; CHECK-NEXT: # %bb.2:
240 ; CHECK-NEXT: or %s11, 0, %s9
241 ; CHECK-NEXT: ld %s10, 8(, %s11)
242 ; CHECK-NEXT: ld %s9, (, %s11)
243 ; CHECK-NEXT: b.l.t (, %s10)
244 %1 = alloca <256 x i1>, align 32
245 %2 = alloca [268435429 x i64], align 8
246 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
247 call void @llvm.lifetime.start.p0(i64 2147483432, ptr nonnull %2)
248 %3 = load volatile <256 x i1>, ptr %1, align 32
252 call void @llvm.lifetime.end.p0(i64 2147483432, ptr nonnull %2)
253 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
257 %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
258 %7 = getelementptr inbounds [268435429 x i64], ptr %2, i64 0, i64 %6
259 %8 = load volatile i64, ptr %7, align 8, !tbaa !3
260 %9 = add nuw nsw i64 %6, 1
261 %10 = icmp eq i64 %9, 268435429
262 br i1 %10, label %4, label %5, !llvm.loop !9
265 ; Function Attrs: argmemonly nofree nounwind
266 define fastcc <256 x i1> @load__vm256_stk_big2() {
267 ; CHECK-LABEL: load__vm256_stk_big2:
269 ; CHECK-NEXT: st %s9, (, %s11)
270 ; CHECK-NEXT: st %s10, 8(, %s11)
271 ; CHECK-NEXT: or %s9, 0, %s11
272 ; CHECK-NEXT: lea %s13, 2147483424
273 ; CHECK-NEXT: and %s13, %s13, (32)0
274 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11)
275 ; CHECK-NEXT: and %s11, %s11, (59)1
276 ; CHECK-NEXT: brge.l %s11, %s8, .LBB3_4
277 ; CHECK-NEXT: # %bb.3:
278 ; CHECK-NEXT: ld %s61, 24(, %s14)
279 ; CHECK-NEXT: or %s62, 0, %s0
280 ; CHECK-NEXT: lea %s63, 315
281 ; CHECK-NEXT: shm.l %s63, (%s61)
282 ; CHECK-NEXT: shm.l %s8, 8(%s61)
283 ; CHECK-NEXT: shm.l %s11, 16(%s61)
285 ; CHECK-NEXT: or %s0, 0, %s62
286 ; CHECK-NEXT: .LBB3_4:
287 ; CHECK-NEXT: lea %s13, -2147483456
288 ; CHECK-NEXT: and %s13, %s13, (32)0
289 ; CHECK-NEXT: lea.sl %s13, (%s11, %s13)
290 ; CHECK-NEXT: ld %s16, (, %s13)
291 ; CHECK-NEXT: lvm %vm1, 0, %s16
292 ; CHECK-NEXT: ld %s16, 8(, %s13)
293 ; CHECK-NEXT: lvm %vm1, 1, %s16
294 ; CHECK-NEXT: ld %s16, 16(, %s13)
295 ; CHECK-NEXT: lvm %vm1, 2, %s16
296 ; CHECK-NEXT: ld %s16, 24(, %s13)
297 ; CHECK-NEXT: lvm %vm1, 3, %s16
298 ; CHECK-NEXT: or %s0, 0, (0)1
299 ; CHECK-NEXT: lea %s1, -2147483648
300 ; CHECK-NEXT: and %s1, %s1, (32)0
301 ; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
302 ; CHECK-NEXT: ld %s2, 192(%s0, %s11)
303 ; CHECK-NEXT: lea %s0, 8(, %s0)
304 ; CHECK-NEXT: brne.l %s0, %s1, .LBB3_1
305 ; CHECK-NEXT: # %bb.2:
306 ; CHECK-NEXT: or %s11, 0, %s9
307 ; CHECK-NEXT: ld %s10, 8(, %s11)
308 ; CHECK-NEXT: ld %s9, (, %s11)
309 ; CHECK-NEXT: b.l.t (, %s10)
310 %1 = alloca <256 x i1>, align 32
311 %2 = alloca [268435456 x i64], align 8
312 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
313 call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2)
314 %3 = load volatile <256 x i1>, ptr %1, align 32
318 call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2)
319 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
323 %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
324 %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6
325 %8 = load volatile i64, ptr %7, align 8, !tbaa !3
326 %9 = add nuw nsw i64 %6, 1
327 %10 = icmp eq i64 %9, 268435456
328 br i1 %10, label %4, label %5, !llvm.loop !10
331 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
332 define fastcc <256 x i1> @load__vm256_stk_dyn(i64 noundef %0) {
333 ; CHECK-LABEL: load__vm256_stk_dyn:
335 ; CHECK-NEXT: st %s9, (, %s11)
336 ; CHECK-NEXT: st %s10, 8(, %s11)
337 ; CHECK-NEXT: or %s9, 0, %s11
338 ; CHECK-NEXT: lea %s11, -272(, %s11)
339 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2
340 ; CHECK-NEXT: # %bb.1:
341 ; CHECK-NEXT: ld %s61, 24(, %s14)
342 ; CHECK-NEXT: or %s62, 0, %s0
343 ; CHECK-NEXT: lea %s63, 315
344 ; CHECK-NEXT: shm.l %s63, (%s61)
345 ; CHECK-NEXT: shm.l %s8, 8(%s61)
346 ; CHECK-NEXT: shm.l %s11, 16(%s61)
348 ; CHECK-NEXT: or %s0, 0, %s62
349 ; CHECK-NEXT: .LBB4_2:
350 ; CHECK-NEXT: sll %s0, %s0, 5
351 ; CHECK-NEXT: lea %s1, __ve_grow_stack@lo
352 ; CHECK-NEXT: and %s1, %s1, (32)0
353 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1)
354 ; CHECK-NEXT: bsic %s10, (, %s12)
355 ; CHECK-NEXT: lea %s0, 240(, %s11)
356 ; CHECK-NEXT: ld %s1, 24(, %s0)
357 ; CHECK-NEXT: ld %s1, 16(, %s0)
358 ; CHECK-NEXT: ld %s1, 8(, %s0)
359 ; CHECK-NEXT: ld %s0, (, %s0)
360 ; CHECK-NEXT: ld %s16, -32(, %s9)
361 ; CHECK-NEXT: lvm %vm1, 0, %s16
362 ; CHECK-NEXT: ld %s16, -24(, %s9)
363 ; CHECK-NEXT: lvm %vm1, 1, %s16
364 ; CHECK-NEXT: ld %s16, -16(, %s9)
365 ; CHECK-NEXT: lvm %vm1, 2, %s16
366 ; CHECK-NEXT: ld %s16, -8(, %s9)
367 ; CHECK-NEXT: lvm %vm1, 3, %s16
368 ; CHECK-NEXT: or %s11, 0, %s9
369 ; CHECK-NEXT: ld %s10, 8(, %s11)
370 ; CHECK-NEXT: ld %s9, (, %s11)
371 ; CHECK-NEXT: b.l.t (, %s10)
372 %2 = alloca <256 x i1>, align 8
373 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2)
374 %3 = alloca <256 x i1>, i64 %0, align 8
375 %4 = load volatile <256 x i1>, ptr %3, align 32
376 %5 = load volatile <256 x i1>, ptr %2, align 32
377 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2)
381 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
382 define fastcc <256 x i1> @load__vm256_stk_dyn_align(i64 noundef %0) {
383 ; CHECK-LABEL: load__vm256_stk_dyn_align:
385 ; CHECK-NEXT: st %s9, (, %s11)
386 ; CHECK-NEXT: st %s10, 8(, %s11)
387 ; CHECK-NEXT: st %s17, 40(, %s11)
388 ; CHECK-NEXT: or %s9, 0, %s11
389 ; CHECK-NEXT: lea %s11, -288(, %s11)
390 ; CHECK-NEXT: and %s11, %s11, (59)1
391 ; CHECK-NEXT: or %s17, 0, %s11
392 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2
393 ; CHECK-NEXT: # %bb.1:
394 ; CHECK-NEXT: ld %s61, 24(, %s14)
395 ; CHECK-NEXT: or %s62, 0, %s0
396 ; CHECK-NEXT: lea %s63, 315
397 ; CHECK-NEXT: shm.l %s63, (%s61)
398 ; CHECK-NEXT: shm.l %s8, 8(%s61)
399 ; CHECK-NEXT: shm.l %s11, 16(%s61)
401 ; CHECK-NEXT: or %s0, 0, %s62
402 ; CHECK-NEXT: .LBB5_2:
403 ; CHECK-NEXT: sll %s0, %s0, 5
404 ; CHECK-NEXT: lea %s1, __ve_grow_stack@lo
405 ; CHECK-NEXT: and %s1, %s1, (32)0
406 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1)
407 ; CHECK-NEXT: bsic %s10, (, %s12)
408 ; CHECK-NEXT: lea %s0, 240(, %s11)
409 ; CHECK-NEXT: ld %s1, 24(, %s0)
410 ; CHECK-NEXT: ld %s1, 16(, %s0)
411 ; CHECK-NEXT: ld %s1, 8(, %s0)
412 ; CHECK-NEXT: ld %s0, (, %s0)
413 ; CHECK-NEXT: ld %s16, 256(, %s17)
414 ; CHECK-NEXT: lvm %vm1, 0, %s16
415 ; CHECK-NEXT: ld %s16, 264(, %s17)
416 ; CHECK-NEXT: lvm %vm1, 1, %s16
417 ; CHECK-NEXT: ld %s16, 272(, %s17)
418 ; CHECK-NEXT: lvm %vm1, 2, %s16
419 ; CHECK-NEXT: ld %s16, 280(, %s17)
420 ; CHECK-NEXT: lvm %vm1, 3, %s16
421 ; CHECK-NEXT: or %s11, 0, %s9
422 ; CHECK-NEXT: ld %s17, 40(, %s11)
423 ; CHECK-NEXT: ld %s10, 8(, %s11)
424 ; CHECK-NEXT: ld %s9, (, %s11)
425 ; CHECK-NEXT: b.l.t (, %s10)
426 %2 = alloca <256 x i1>, align 32
427 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2)
428 %3 = alloca <256 x i1>, i64 %0, align 8
429 %4 = load volatile <256 x i1>, ptr %3, align 32
430 %5 = load volatile <256 x i1>, ptr %2, align 32
431 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2)
435 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
436 define fastcc <256 x i1> @load__vm256_stk_dyn_align2(i64 noundef %0) {
437 ; CHECK-LABEL: load__vm256_stk_dyn_align2:
439 ; CHECK-NEXT: st %s9, (, %s11)
440 ; CHECK-NEXT: st %s10, 8(, %s11)
441 ; CHECK-NEXT: st %s17, 40(, %s11)
442 ; CHECK-NEXT: or %s9, 0, %s11
443 ; CHECK-NEXT: lea %s11, -320(, %s11)
444 ; CHECK-NEXT: and %s11, %s11, (58)1
445 ; CHECK-NEXT: or %s17, 0, %s11
446 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB6_2
447 ; CHECK-NEXT: # %bb.1:
448 ; CHECK-NEXT: ld %s61, 24(, %s14)
449 ; CHECK-NEXT: or %s62, 0, %s0
450 ; CHECK-NEXT: lea %s63, 315
451 ; CHECK-NEXT: shm.l %s63, (%s61)
452 ; CHECK-NEXT: shm.l %s8, 8(%s61)
453 ; CHECK-NEXT: shm.l %s11, 16(%s61)
455 ; CHECK-NEXT: or %s0, 0, %s62
456 ; CHECK-NEXT: .LBB6_2:
457 ; CHECK-NEXT: sll %s0, %s0, 5
458 ; CHECK-NEXT: lea %s1, __ve_grow_stack@lo
459 ; CHECK-NEXT: and %s1, %s1, (32)0
460 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1)
461 ; CHECK-NEXT: bsic %s10, (, %s12)
462 ; CHECK-NEXT: lea %s0, 240(, %s11)
463 ; CHECK-NEXT: ld %s1, 24(, %s0)
464 ; CHECK-NEXT: ld %s1, 16(, %s0)
465 ; CHECK-NEXT: ld %s1, 8(, %s0)
466 ; CHECK-NEXT: ld %s0, (, %s0)
467 ; CHECK-NEXT: ld %s16, 288(, %s17)
468 ; CHECK-NEXT: lvm %vm1, 0, %s16
469 ; CHECK-NEXT: ld %s16, 296(, %s17)
470 ; CHECK-NEXT: lvm %vm1, 1, %s16
471 ; CHECK-NEXT: ld %s16, 304(, %s17)
472 ; CHECK-NEXT: lvm %vm1, 2, %s16
473 ; CHECK-NEXT: ld %s16, 312(, %s17)
474 ; CHECK-NEXT: lvm %vm1, 3, %s16
475 ; CHECK-NEXT: ld %s16, 256(, %s17)
476 ; CHECK-NEXT: lvm %vm2, 0, %s16
477 ; CHECK-NEXT: ld %s16, 264(, %s17)
478 ; CHECK-NEXT: lvm %vm2, 1, %s16
479 ; CHECK-NEXT: ld %s16, 272(, %s17)
480 ; CHECK-NEXT: lvm %vm2, 2, %s16
481 ; CHECK-NEXT: ld %s16, 280(, %s17)
482 ; CHECK-NEXT: lvm %vm2, 3, %s16
483 ; CHECK-NEXT: or %s11, 0, %s9
484 ; CHECK-NEXT: ld %s17, 40(, %s11)
485 ; CHECK-NEXT: ld %s10, 8(, %s11)
486 ; CHECK-NEXT: ld %s9, (, %s11)
487 ; CHECK-NEXT: b.l.t (, %s10)
488 %2 = alloca <256 x i1>, align 32
489 %3 = alloca <256 x i1>, align 64
490 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2)
491 %4 = alloca <256 x i1>, i64 %0, align 8
492 %5 = load volatile <256 x i1>, ptr %4, align 32
493 %6 = load volatile <256 x i1>, ptr %2, align 32
494 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
495 %7 = load volatile <256 x i1>, ptr %3, align 64
496 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
497 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2)
501 ; Function Attrs: nounwind
502 define fastcc <256 x i1> @load__vm256_stk_dyn_align_spill(i64 noundef %0) {
503 ; CHECK-LABEL: load__vm256_stk_dyn_align_spill:
505 ; CHECK-NEXT: st %s9, (, %s11)
506 ; CHECK-NEXT: st %s10, 8(, %s11)
507 ; CHECK-NEXT: st %s17, 40(, %s11)
508 ; CHECK-NEXT: or %s9, 0, %s11
509 ; CHECK-NEXT: lea %s11, -320(, %s11)
510 ; CHECK-NEXT: and %s11, %s11, (59)1
511 ; CHECK-NEXT: or %s17, 0, %s11
512 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB7_2
513 ; CHECK-NEXT: # %bb.1:
514 ; CHECK-NEXT: ld %s61, 24(, %s14)
515 ; CHECK-NEXT: or %s62, 0, %s0
516 ; CHECK-NEXT: lea %s63, 315
517 ; CHECK-NEXT: shm.l %s63, (%s61)
518 ; CHECK-NEXT: shm.l %s8, 8(%s61)
519 ; CHECK-NEXT: shm.l %s11, 16(%s61)
521 ; CHECK-NEXT: or %s0, 0, %s62
522 ; CHECK-NEXT: .LBB7_2:
523 ; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill
524 ; CHECK-NEXT: or %s18, 0, %s0
525 ; CHECK-NEXT: lea %s0, 15(, %s0)
526 ; CHECK-NEXT: and %s0, -16, %s0
527 ; CHECK-NEXT: lea %s1, __ve_grow_stack@lo
528 ; CHECK-NEXT: and %s1, %s1, (32)0
529 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1)
530 ; CHECK-NEXT: bsic %s10, (, %s12)
531 ; CHECK-NEXT: lea %s0, 240(, %s11)
532 ; CHECK-NEXT: ld %s1, 24(, %s0)
533 ; CHECK-NEXT: ld %s1, 16(, %s0)
534 ; CHECK-NEXT: ld %s1, 8(, %s0)
535 ; CHECK-NEXT: ld %s0, (, %s0)
536 ; CHECK-NEXT: ld %s16, 288(, %s17)
537 ; CHECK-NEXT: lvm %vm1, 0, %s16
538 ; CHECK-NEXT: ld %s16, 296(, %s17)
539 ; CHECK-NEXT: lvm %vm1, 1, %s16
540 ; CHECK-NEXT: ld %s16, 304(, %s17)
541 ; CHECK-NEXT: lvm %vm1, 2, %s16
542 ; CHECK-NEXT: ld %s16, 312(, %s17)
543 ; CHECK-NEXT: lvm %vm1, 3, %s16
544 ; CHECK-NEXT: svm %s16, %vm1, 0
545 ; CHECK-NEXT: st %s16, 256(, %s17)
546 ; CHECK-NEXT: svm %s16, %vm1, 1
547 ; CHECK-NEXT: st %s16, 264(, %s17)
548 ; CHECK-NEXT: svm %s16, %vm1, 2
549 ; CHECK-NEXT: st %s16, 272(, %s17)
550 ; CHECK-NEXT: svm %s16, %vm1, 3
551 ; CHECK-NEXT: st %s16, 280(, %s17) # 32-byte Folded Spill
552 ; CHECK-NEXT: lea %s0, dummy@lo
553 ; CHECK-NEXT: and %s0, %s0, (32)0
554 ; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0)
555 ; CHECK-NEXT: bsic %s10, (, %s12)
556 ; CHECK-NEXT: lea %s0, pass@lo
557 ; CHECK-NEXT: and %s0, %s0, (32)0
558 ; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0)
559 ; CHECK-NEXT: or %s0, 0, %s18
560 ; CHECK-NEXT: bsic %s10, (, %s12)
561 ; CHECK-NEXT: ld %s16, 256(, %s17)
562 ; CHECK-NEXT: lvm %vm1, 0, %s16
563 ; CHECK-NEXT: ld %s16, 264(, %s17)
564 ; CHECK-NEXT: lvm %vm1, 1, %s16
565 ; CHECK-NEXT: ld %s16, 272(, %s17)
566 ; CHECK-NEXT: lvm %vm1, 2, %s16
567 ; CHECK-NEXT: ld %s16, 280(, %s17) # 32-byte Folded Reload
568 ; CHECK-NEXT: lvm %vm1, 3, %s16
569 ; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload
570 ; CHECK-NEXT: or %s11, 0, %s9
571 ; CHECK-NEXT: ld %s17, 40(, %s11)
572 ; CHECK-NEXT: ld %s10, 8(, %s11)
573 ; CHECK-NEXT: ld %s9, (, %s11)
574 ; CHECK-NEXT: b.l.t (, %s10)
575 %2 = alloca <256 x i1>, align 32
576 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2)
577 %3 = alloca i8, i64 %0, align 8
578 %4 = load volatile <256 x i1>, ptr %3, align 32
579 %5 = load volatile <256 x i1>, ptr %2, align 32
580 tail call fastcc void @dummy()
581 tail call fastcc void @pass(i64 noundef %0)
582 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2)
586 declare fastcc void @dummy()
588 declare fastcc void @pass(i64 noundef)
590 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
591 define fastcc <512 x i1> @load__vm512_stk() {
592 ; CHECK-LABEL: load__vm512_stk:
594 ; CHECK-NEXT: st %s9, (, %s11)
595 ; CHECK-NEXT: st %s10, 8(, %s11)
596 ; CHECK-NEXT: or %s9, 0, %s11
597 ; CHECK-NEXT: lea %s11, -256(, %s11)
598 ; CHECK-NEXT: and %s11, %s11, (58)1
599 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB8_2
600 ; CHECK-NEXT: # %bb.1:
601 ; CHECK-NEXT: ld %s61, 24(, %s14)
602 ; CHECK-NEXT: or %s62, 0, %s0
603 ; CHECK-NEXT: lea %s63, 315
604 ; CHECK-NEXT: shm.l %s63, (%s61)
605 ; CHECK-NEXT: shm.l %s8, 8(%s61)
606 ; CHECK-NEXT: shm.l %s11, 16(%s61)
608 ; CHECK-NEXT: or %s0, 0, %s62
609 ; CHECK-NEXT: .LBB8_2:
610 ; CHECK-NEXT: # implicit-def: $vmp1
611 ; CHECK-NEXT: ld %s16, 192(, %s11)
612 ; CHECK-NEXT: lvm %vm3, 0, %s16
613 ; CHECK-NEXT: ld %s16, 200(, %s11)
614 ; CHECK-NEXT: lvm %vm3, 1, %s16
615 ; CHECK-NEXT: ld %s16, 208(, %s11)
616 ; CHECK-NEXT: lvm %vm3, 2, %s16
617 ; CHECK-NEXT: ld %s16, 216(, %s11)
618 ; CHECK-NEXT: lvm %vm3, 3, %s16
619 ; CHECK-NEXT: ld %s16, 224(, %s11)
620 ; CHECK-NEXT: lvm %vm2, 0, %s16
621 ; CHECK-NEXT: ld %s16, 232(, %s11)
622 ; CHECK-NEXT: lvm %vm2, 1, %s16
623 ; CHECK-NEXT: ld %s16, 240(, %s11)
624 ; CHECK-NEXT: lvm %vm2, 2, %s16
625 ; CHECK-NEXT: ld %s16, 248(, %s11)
626 ; CHECK-NEXT: lvm %vm2, 3, %s16
627 ; CHECK-NEXT: or %s11, 0, %s9
628 ; CHECK-NEXT: ld %s10, 8(, %s11)
629 ; CHECK-NEXT: ld %s9, (, %s11)
630 ; CHECK-NEXT: b.l.t (, %s10)
631 %1 = alloca <512 x i1>, align 64
632 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1)
633 %2 = load volatile <512 x i1>, ptr %1, align 64
634 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1)
638 ; Function Attrs: argmemonly nofree nounwind
639 define fastcc <512 x i1> @load__vm512_stk_big_fit() {
640 ; CHECK-LABEL: load__vm512_stk_big_fit:
642 ; CHECK-NEXT: st %s9, (, %s11)
643 ; CHECK-NEXT: st %s10, 8(, %s11)
644 ; CHECK-NEXT: or %s9, 0, %s11
645 ; CHECK-NEXT: lea %s11, -2147483648(, %s11)
646 ; CHECK-NEXT: and %s11, %s11, (58)1
647 ; CHECK-NEXT: brge.l %s11, %s8, .LBB9_4
648 ; CHECK-NEXT: # %bb.3:
649 ; CHECK-NEXT: ld %s61, 24(, %s14)
650 ; CHECK-NEXT: or %s62, 0, %s0
651 ; CHECK-NEXT: lea %s63, 315
652 ; CHECK-NEXT: shm.l %s63, (%s61)
653 ; CHECK-NEXT: shm.l %s8, 8(%s61)
654 ; CHECK-NEXT: shm.l %s11, 16(%s61)
656 ; CHECK-NEXT: or %s0, 0, %s62
657 ; CHECK-NEXT: .LBB9_4:
658 ; CHECK-NEXT: # implicit-def: $vmp1
659 ; CHECK-NEXT: ld %s16, 2147483584(, %s11)
660 ; CHECK-NEXT: lvm %vm3, 0, %s16
661 ; CHECK-NEXT: ld %s16, 2147483592(, %s11)
662 ; CHECK-NEXT: lvm %vm3, 1, %s16
663 ; CHECK-NEXT: ld %s16, 2147483600(, %s11)
664 ; CHECK-NEXT: lvm %vm3, 2, %s16
665 ; CHECK-NEXT: ld %s16, 2147483608(, %s11)
666 ; CHECK-NEXT: lvm %vm3, 3, %s16
667 ; CHECK-NEXT: ld %s16, 2147483616(, %s11)
668 ; CHECK-NEXT: lvm %vm2, 0, %s16
669 ; CHECK-NEXT: ld %s16, 2147483624(, %s11)
670 ; CHECK-NEXT: lvm %vm2, 1, %s16
671 ; CHECK-NEXT: ld %s16, 2147483632(, %s11)
672 ; CHECK-NEXT: lvm %vm2, 2, %s16
673 ; CHECK-NEXT: ld %s16, 2147483640(, %s11)
674 ; CHECK-NEXT: lvm %vm2, 3, %s16
675 ; CHECK-NEXT: or %s0, 0, (0)1
676 ; CHECK-NEXT: lea %s1, 2147483392
677 ; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
678 ; CHECK-NEXT: ld %s2, 192(%s0, %s11)
679 ; CHECK-NEXT: lea %s0, 8(, %s0)
680 ; CHECK-NEXT: brne.l %s0, %s1, .LBB9_1
681 ; CHECK-NEXT: # %bb.2:
682 ; CHECK-NEXT: or %s11, 0, %s9
683 ; CHECK-NEXT: ld %s10, 8(, %s11)
684 ; CHECK-NEXT: ld %s9, (, %s11)
685 ; CHECK-NEXT: b.l.t (, %s10)
686 %1 = alloca <512 x i1>, align 64
687 %2 = alloca [268435424 x i64], align 8
688 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1)
689 call void @llvm.lifetime.start.p0(i64 2147483392, ptr nonnull %2)
690 %3 = load volatile <512 x i1>, ptr %1, align 64
694 call void @llvm.lifetime.end.p0(i64 2147483392, ptr nonnull %2)
695 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1)
699 %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
700 %7 = getelementptr inbounds [268435424 x i64], ptr %2, i64 0, i64 %6
701 %8 = load volatile i64, ptr %7, align 8, !tbaa !3
702 %9 = add nuw nsw i64 %6, 1
703 %10 = icmp eq i64 %9, 268435424
704 br i1 %10, label %4, label %5, !llvm.loop !11
707 ; Function Attrs: argmemonly nofree nounwind
708 define fastcc <512 x i1> @load__vm512_stk_big() {
709 ; CHECK-LABEL: load__vm512_stk_big:
711 ; CHECK-NEXT: st %s9, (, %s11)
712 ; CHECK-NEXT: st %s10, 8(, %s11)
713 ; CHECK-NEXT: or %s9, 0, %s11
714 ; CHECK-NEXT: lea %s13, 2147483584
715 ; CHECK-NEXT: and %s13, %s13, (32)0
716 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11)
717 ; CHECK-NEXT: and %s11, %s11, (58)1
718 ; CHECK-NEXT: brge.l %s11, %s8, .LBB10_4
719 ; CHECK-NEXT: # %bb.3:
720 ; CHECK-NEXT: ld %s61, 24(, %s14)
721 ; CHECK-NEXT: or %s62, 0, %s0
722 ; CHECK-NEXT: lea %s63, 315
723 ; CHECK-NEXT: shm.l %s63, (%s61)
724 ; CHECK-NEXT: shm.l %s8, 8(%s61)
725 ; CHECK-NEXT: shm.l %s11, 16(%s61)
727 ; CHECK-NEXT: or %s0, 0, %s62
728 ; CHECK-NEXT: .LBB10_4:
729 ; CHECK-NEXT: lea %s13, -2147483648
730 ; CHECK-NEXT: and %s13, %s13, (32)0
731 ; CHECK-NEXT: lea.sl %s13, (%s11, %s13)
732 ; CHECK-NEXT: # implicit-def: $vmp1
733 ; CHECK-NEXT: ld %s16, (, %s13)
734 ; CHECK-NEXT: lvm %vm3, 0, %s16
735 ; CHECK-NEXT: ld %s16, 8(, %s13)
736 ; CHECK-NEXT: lvm %vm3, 1, %s16
737 ; CHECK-NEXT: ld %s16, 16(, %s13)
738 ; CHECK-NEXT: lvm %vm3, 2, %s16
739 ; CHECK-NEXT: ld %s16, 24(, %s13)
740 ; CHECK-NEXT: lvm %vm3, 3, %s16
741 ; CHECK-NEXT: ld %s16, 32(, %s13)
742 ; CHECK-NEXT: lvm %vm2, 0, %s16
743 ; CHECK-NEXT: ld %s16, 40(, %s13)
744 ; CHECK-NEXT: lvm %vm2, 1, %s16
745 ; CHECK-NEXT: ld %s16, 48(, %s13)
746 ; CHECK-NEXT: lvm %vm2, 2, %s16
747 ; CHECK-NEXT: ld %s16, 56(, %s13)
748 ; CHECK-NEXT: lvm %vm2, 3, %s16
749 ; CHECK-NEXT: or %s0, 0, (0)1
750 ; CHECK-NEXT: lea %s1, 2147483400
751 ; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
752 ; CHECK-NEXT: ld %s2, 248(%s0, %s11)
753 ; CHECK-NEXT: lea %s0, 8(, %s0)
754 ; CHECK-NEXT: brne.l %s0, %s1, .LBB10_1
755 ; CHECK-NEXT: # %bb.2:
756 ; CHECK-NEXT: or %s11, 0, %s9
757 ; CHECK-NEXT: ld %s10, 8(, %s11)
758 ; CHECK-NEXT: ld %s9, (, %s11)
759 ; CHECK-NEXT: b.l.t (, %s10)
760 %1 = alloca <512 x i1>, align 64
761 %2 = alloca [268435425 x i64], align 8
762 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1)
763 call void @llvm.lifetime.start.p0(i64 2147483400, ptr nonnull %2)
764 %3 = load volatile <512 x i1>, ptr %1, align 64
768 call void @llvm.lifetime.end.p0(i64 2147483400, ptr nonnull %2)
769 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1)
773 %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
774 %7 = getelementptr inbounds [268435425 x i64], ptr %2, i64 0, i64 %6
775 %8 = load volatile i64, ptr %7, align 8, !tbaa !3
776 %9 = add nuw nsw i64 %6, 1
777 %10 = icmp eq i64 %9, 268435425
778 br i1 %10, label %4, label %5, !llvm.loop !12
781 ; Function Attrs: argmemonly nofree nounwind
782 define fastcc <512 x i1> @load__vm512_stk_big2() {
783 ; CHECK-LABEL: load__vm512_stk_big2:
785 ; CHECK-NEXT: st %s9, (, %s11)
786 ; CHECK-NEXT: st %s10, 8(, %s11)
787 ; CHECK-NEXT: or %s9, 0, %s11
788 ; CHECK-NEXT: lea %s13, 2147483392
789 ; CHECK-NEXT: and %s13, %s13, (32)0
790 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11)
791 ; CHECK-NEXT: and %s11, %s11, (58)1
792 ; CHECK-NEXT: brge.l %s11, %s8, .LBB11_4
793 ; CHECK-NEXT: # %bb.3:
794 ; CHECK-NEXT: ld %s61, 24(, %s14)
795 ; CHECK-NEXT: or %s62, 0, %s0
796 ; CHECK-NEXT: lea %s63, 315
797 ; CHECK-NEXT: shm.l %s63, (%s61)
798 ; CHECK-NEXT: shm.l %s8, 8(%s61)
799 ; CHECK-NEXT: shm.l %s11, 16(%s61)
801 ; CHECK-NEXT: or %s0, 0, %s62
802 ; CHECK-NEXT: .LBB11_4:
803 ; CHECK-NEXT: lea %s13, -2147483456
804 ; CHECK-NEXT: and %s13, %s13, (32)0
805 ; CHECK-NEXT: lea.sl %s13, (%s11, %s13)
806 ; CHECK-NEXT: # implicit-def: $vmp1
807 ; CHECK-NEXT: ld %s16, (, %s13)
808 ; CHECK-NEXT: lvm %vm3, 0, %s16
809 ; CHECK-NEXT: ld %s16, 8(, %s13)
810 ; CHECK-NEXT: lvm %vm3, 1, %s16
811 ; CHECK-NEXT: ld %s16, 16(, %s13)
812 ; CHECK-NEXT: lvm %vm3, 2, %s16
813 ; CHECK-NEXT: ld %s16, 24(, %s13)
814 ; CHECK-NEXT: lvm %vm3, 3, %s16
815 ; CHECK-NEXT: ld %s16, 32(, %s13)
816 ; CHECK-NEXT: lvm %vm2, 0, %s16
817 ; CHECK-NEXT: ld %s16, 40(, %s13)
818 ; CHECK-NEXT: lvm %vm2, 1, %s16
819 ; CHECK-NEXT: ld %s16, 48(, %s13)
820 ; CHECK-NEXT: lvm %vm2, 2, %s16
821 ; CHECK-NEXT: ld %s16, 56(, %s13)
822 ; CHECK-NEXT: lvm %vm2, 3, %s16
823 ; CHECK-NEXT: or %s0, 0, (0)1
824 ; CHECK-NEXT: lea %s1, -2147483648
825 ; CHECK-NEXT: and %s1, %s1, (32)0
826 ; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
827 ; CHECK-NEXT: ld %s2, 192(%s0, %s11)
828 ; CHECK-NEXT: lea %s0, 8(, %s0)
829 ; CHECK-NEXT: brne.l %s0, %s1, .LBB11_1
830 ; CHECK-NEXT: # %bb.2:
831 ; CHECK-NEXT: or %s11, 0, %s9
832 ; CHECK-NEXT: ld %s10, 8(, %s11)
833 ; CHECK-NEXT: ld %s9, (, %s11)
834 ; CHECK-NEXT: b.l.t (, %s10)
835 %1 = alloca <512 x i1>, align 64
836 %2 = alloca [268435456 x i64], align 8
837 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1)
838 call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2)
839 %3 = load volatile <512 x i1>, ptr %1, align 64
843 call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2)
844 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1)
848 %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
849 %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6
850 %8 = load volatile i64, ptr %7, align 8, !tbaa !3
851 %9 = add nuw nsw i64 %6, 1
852 %10 = icmp eq i64 %9, 268435456
853 br i1 %10, label %4, label %5, !llvm.loop !13
856 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
857 define fastcc <512 x i1> @load__vm512_stk_dyn(i64 noundef %0) {
858 ; CHECK-LABEL: load__vm512_stk_dyn:
860 ; CHECK-NEXT: st %s9, (, %s11)
861 ; CHECK-NEXT: st %s10, 8(, %s11)
862 ; CHECK-NEXT: st %s17, 40(, %s11)
863 ; CHECK-NEXT: or %s9, 0, %s11
864 ; CHECK-NEXT: lea %s11, -320(, %s11)
865 ; CHECK-NEXT: and %s11, %s11, (58)1
866 ; CHECK-NEXT: or %s17, 0, %s11
867 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB12_2
868 ; CHECK-NEXT: # %bb.1:
869 ; CHECK-NEXT: ld %s61, 24(, %s14)
870 ; CHECK-NEXT: or %s62, 0, %s0
871 ; CHECK-NEXT: lea %s63, 315
872 ; CHECK-NEXT: shm.l %s63, (%s61)
873 ; CHECK-NEXT: shm.l %s8, 8(%s61)
874 ; CHECK-NEXT: shm.l %s11, 16(%s61)
876 ; CHECK-NEXT: or %s0, 0, %s62
877 ; CHECK-NEXT: .LBB12_2:
878 ; CHECK-NEXT: sll %s0, %s0, 6
879 ; CHECK-NEXT: lea %s1, __ve_grow_stack@lo
880 ; CHECK-NEXT: and %s1, %s1, (32)0
881 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1)
882 ; CHECK-NEXT: bsic %s10, (, %s12)
883 ; CHECK-NEXT: lea %s0, 240(, %s11)
884 ; CHECK-NEXT: ld %s1, 56(, %s0)
885 ; CHECK-NEXT: ld %s1, 48(, %s0)
886 ; CHECK-NEXT: ld %s1, 40(, %s0)
887 ; CHECK-NEXT: ld %s1, 32(, %s0)
888 ; CHECK-NEXT: ld %s1, 24(, %s0)
889 ; CHECK-NEXT: ld %s1, 16(, %s0)
890 ; CHECK-NEXT: ld %s1, 8(, %s0)
891 ; CHECK-NEXT: ld %s0, (, %s0)
892 ; CHECK-NEXT: # implicit-def: $vmp1
893 ; CHECK-NEXT: ld %s16, 256(, %s17)
894 ; CHECK-NEXT: lvm %vm3, 0, %s16
895 ; CHECK-NEXT: ld %s16, 264(, %s17)
896 ; CHECK-NEXT: lvm %vm3, 1, %s16
897 ; CHECK-NEXT: ld %s16, 272(, %s17)
898 ; CHECK-NEXT: lvm %vm3, 2, %s16
899 ; CHECK-NEXT: ld %s16, 280(, %s17)
900 ; CHECK-NEXT: lvm %vm3, 3, %s16
901 ; CHECK-NEXT: ld %s16, 288(, %s17)
902 ; CHECK-NEXT: lvm %vm2, 0, %s16
903 ; CHECK-NEXT: ld %s16, 296(, %s17)
904 ; CHECK-NEXT: lvm %vm2, 1, %s16
905 ; CHECK-NEXT: ld %s16, 304(, %s17)
906 ; CHECK-NEXT: lvm %vm2, 2, %s16
907 ; CHECK-NEXT: ld %s16, 312(, %s17)
908 ; CHECK-NEXT: lvm %vm2, 3, %s16
909 ; CHECK-NEXT: or %s11, 0, %s9
910 ; CHECK-NEXT: ld %s17, 40(, %s11)
911 ; CHECK-NEXT: ld %s10, 8(, %s11)
912 ; CHECK-NEXT: ld %s9, (, %s11)
913 ; CHECK-NEXT: b.l.t (, %s10)
914 %2 = alloca <512 x i1>, align 64
915 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
916 %3 = alloca <512 x i1>, i64 %0, align 8
917 %4 = load volatile <512 x i1>, ptr %3, align 64
918 %5 = load volatile <512 x i1>, ptr %2, align 64
919 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
923 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
924 define fastcc <512 x i1> @load__vm512_stk_dyn_align(i64 noundef %0) {
925 ; CHECK-LABEL: load__vm512_stk_dyn_align:
927 ; CHECK-NEXT: st %s9, (, %s11)
928 ; CHECK-NEXT: st %s10, 8(, %s11)
929 ; CHECK-NEXT: st %s17, 40(, %s11)
930 ; CHECK-NEXT: or %s9, 0, %s11
931 ; CHECK-NEXT: lea %s11, -320(, %s11)
932 ; CHECK-NEXT: and %s11, %s11, (59)1
933 ; CHECK-NEXT: or %s17, 0, %s11
934 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB13_2
935 ; CHECK-NEXT: # %bb.1:
936 ; CHECK-NEXT: ld %s61, 24(, %s14)
937 ; CHECK-NEXT: or %s62, 0, %s0
938 ; CHECK-NEXT: lea %s63, 315
939 ; CHECK-NEXT: shm.l %s63, (%s61)
940 ; CHECK-NEXT: shm.l %s8, 8(%s61)
941 ; CHECK-NEXT: shm.l %s11, 16(%s61)
943 ; CHECK-NEXT: or %s0, 0, %s62
944 ; CHECK-NEXT: .LBB13_2:
945 ; CHECK-NEXT: sll %s0, %s0, 6
946 ; CHECK-NEXT: lea %s1, __ve_grow_stack@lo
947 ; CHECK-NEXT: and %s1, %s1, (32)0
948 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1)
949 ; CHECK-NEXT: bsic %s10, (, %s12)
950 ; CHECK-NEXT: lea %s0, 240(, %s11)
951 ; CHECK-NEXT: ld %s1, 56(, %s0)
952 ; CHECK-NEXT: ld %s1, 48(, %s0)
953 ; CHECK-NEXT: ld %s1, 40(, %s0)
954 ; CHECK-NEXT: ld %s1, 32(, %s0)
955 ; CHECK-NEXT: ld %s1, 24(, %s0)
956 ; CHECK-NEXT: ld %s1, 16(, %s0)
957 ; CHECK-NEXT: ld %s1, 8(, %s0)
958 ; CHECK-NEXT: ld %s0, (, %s0)
959 ; CHECK-NEXT: # implicit-def: $vmp1
960 ; CHECK-NEXT: ld %s16, 256(, %s17)
961 ; CHECK-NEXT: lvm %vm3, 0, %s16
962 ; CHECK-NEXT: ld %s16, 264(, %s17)
963 ; CHECK-NEXT: lvm %vm3, 1, %s16
964 ; CHECK-NEXT: ld %s16, 272(, %s17)
965 ; CHECK-NEXT: lvm %vm3, 2, %s16
966 ; CHECK-NEXT: ld %s16, 280(, %s17)
967 ; CHECK-NEXT: lvm %vm3, 3, %s16
968 ; CHECK-NEXT: ld %s16, 288(, %s17)
969 ; CHECK-NEXT: lvm %vm2, 0, %s16
970 ; CHECK-NEXT: ld %s16, 296(, %s17)
971 ; CHECK-NEXT: lvm %vm2, 1, %s16
972 ; CHECK-NEXT: ld %s16, 304(, %s17)
973 ; CHECK-NEXT: lvm %vm2, 2, %s16
974 ; CHECK-NEXT: ld %s16, 312(, %s17)
975 ; CHECK-NEXT: lvm %vm2, 3, %s16
976 ; CHECK-NEXT: or %s11, 0, %s9
977 ; CHECK-NEXT: ld %s17, 40(, %s11)
978 ; CHECK-NEXT: ld %s10, 8(, %s11)
979 ; CHECK-NEXT: ld %s9, (, %s11)
980 ; CHECK-NEXT: b.l.t (, %s10)
981 %2 = alloca <512 x i1>, align 32
982 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
983 %3 = alloca <512 x i1>, i64 %0, align 8
984 %4 = load volatile <512 x i1>, ptr %3, align 64
985 %5 = load volatile <512 x i1>, ptr %2, align 32
986 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
990 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
991 define fastcc <512 x i1> @load__vm512_stk_dyn_align2(i64 noundef %0) {
992 ; CHECK-LABEL: load__vm512_stk_dyn_align2:
994 ; CHECK-NEXT: st %s9, (, %s11)
995 ; CHECK-NEXT: st %s10, 8(, %s11)
996 ; CHECK-NEXT: st %s17, 40(, %s11)
997 ; CHECK-NEXT: or %s9, 0, %s11
998 ; CHECK-NEXT: lea %s11, -384(, %s11)
999 ; CHECK-NEXT: and %s11, %s11, (58)1
1000 ; CHECK-NEXT: or %s17, 0, %s11
1001 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB14_2
1002 ; CHECK-NEXT: # %bb.1:
1003 ; CHECK-NEXT: ld %s61, 24(, %s14)
1004 ; CHECK-NEXT: or %s62, 0, %s0
1005 ; CHECK-NEXT: lea %s63, 315
1006 ; CHECK-NEXT: shm.l %s63, (%s61)
1007 ; CHECK-NEXT: shm.l %s8, 8(%s61)
1008 ; CHECK-NEXT: shm.l %s11, 16(%s61)
1010 ; CHECK-NEXT: or %s0, 0, %s62
1011 ; CHECK-NEXT: .LBB14_2:
1012 ; CHECK-NEXT: sll %s0, %s0, 6
1013 ; CHECK-NEXT: lea %s1, __ve_grow_stack@lo
1014 ; CHECK-NEXT: and %s1, %s1, (32)0
1015 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1)
1016 ; CHECK-NEXT: bsic %s10, (, %s12)
1017 ; CHECK-NEXT: lea %s0, 240(, %s11)
1018 ; CHECK-NEXT: ld %s1, 56(, %s0)
1019 ; CHECK-NEXT: ld %s1, 48(, %s0)
1020 ; CHECK-NEXT: ld %s1, 40(, %s0)
1021 ; CHECK-NEXT: ld %s1, 32(, %s0)
1022 ; CHECK-NEXT: ld %s1, 24(, %s0)
1023 ; CHECK-NEXT: ld %s1, 16(, %s0)
1024 ; CHECK-NEXT: ld %s1, 8(, %s0)
1025 ; CHECK-NEXT: ld %s0, (, %s0)
1026 ; CHECK-NEXT: # implicit-def: $vmp1
1027 ; CHECK-NEXT: ld %s16, 320(, %s17)
1028 ; CHECK-NEXT: lvm %vm3, 0, %s16
1029 ; CHECK-NEXT: ld %s16, 328(, %s17)
1030 ; CHECK-NEXT: lvm %vm3, 1, %s16
1031 ; CHECK-NEXT: ld %s16, 336(, %s17)
1032 ; CHECK-NEXT: lvm %vm3, 2, %s16
1033 ; CHECK-NEXT: ld %s16, 344(, %s17)
1034 ; CHECK-NEXT: lvm %vm3, 3, %s16
1035 ; CHECK-NEXT: ld %s16, 352(, %s17)
1036 ; CHECK-NEXT: lvm %vm2, 0, %s16
1037 ; CHECK-NEXT: ld %s16, 360(, %s17)
1038 ; CHECK-NEXT: lvm %vm2, 1, %s16
1039 ; CHECK-NEXT: ld %s16, 368(, %s17)
1040 ; CHECK-NEXT: lvm %vm2, 2, %s16
1041 ; CHECK-NEXT: ld %s16, 376(, %s17)
1042 ; CHECK-NEXT: lvm %vm2, 3, %s16
1043 ; CHECK-NEXT: # implicit-def: $vmp2
1044 ; CHECK-NEXT: ld %s16, 256(, %s17)
1045 ; CHECK-NEXT: lvm %vm5, 0, %s16
1046 ; CHECK-NEXT: ld %s16, 264(, %s17)
1047 ; CHECK-NEXT: lvm %vm5, 1, %s16
1048 ; CHECK-NEXT: ld %s16, 272(, %s17)
1049 ; CHECK-NEXT: lvm %vm5, 2, %s16
1050 ; CHECK-NEXT: ld %s16, 280(, %s17)
1051 ; CHECK-NEXT: lvm %vm5, 3, %s16
1052 ; CHECK-NEXT: ld %s16, 288(, %s17)
1053 ; CHECK-NEXT: lvm %vm4, 0, %s16
1054 ; CHECK-NEXT: ld %s16, 296(, %s17)
1055 ; CHECK-NEXT: lvm %vm4, 1, %s16
1056 ; CHECK-NEXT: ld %s16, 304(, %s17)
1057 ; CHECK-NEXT: lvm %vm4, 2, %s16
1058 ; CHECK-NEXT: ld %s16, 312(, %s17)
1059 ; CHECK-NEXT: lvm %vm4, 3, %s16
1060 ; CHECK-NEXT: or %s11, 0, %s9
1061 ; CHECK-NEXT: ld %s17, 40(, %s11)
1062 ; CHECK-NEXT: ld %s10, 8(, %s11)
1063 ; CHECK-NEXT: ld %s9, (, %s11)
1064 ; CHECK-NEXT: b.l.t (, %s10)
1065 %2 = alloca <512 x i1>, align 32
1066 %3 = alloca <512 x i1>, align 64
1067 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
1068 %4 = alloca <512 x i1>, i64 %0, align 8
1069 %5 = load volatile <512 x i1>, ptr %4, align 64
1070 %6 = load volatile <512 x i1>, ptr %2, align 32
1071 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3)
1072 %7 = load volatile <512 x i1>, ptr %3, align 64
1073 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3)
1074 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
1078 ; Function Attrs: nounwind
1079 define fastcc <512 x i1> @load__vm512_stk_dyn_align_spill(i64 noundef %0) {
1080 ; CHECK-LABEL: load__vm512_stk_dyn_align_spill:
1082 ; CHECK-NEXT: st %s9, (, %s11)
1083 ; CHECK-NEXT: st %s10, 8(, %s11)
1084 ; CHECK-NEXT: st %s17, 40(, %s11)
1085 ; CHECK-NEXT: or %s9, 0, %s11
1086 ; CHECK-NEXT: lea %s11, -384(, %s11)
1087 ; CHECK-NEXT: and %s11, %s11, (59)1
1088 ; CHECK-NEXT: or %s17, 0, %s11
1089 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB15_2
1090 ; CHECK-NEXT: # %bb.1:
1091 ; CHECK-NEXT: ld %s61, 24(, %s14)
1092 ; CHECK-NEXT: or %s62, 0, %s0
1093 ; CHECK-NEXT: lea %s63, 315
1094 ; CHECK-NEXT: shm.l %s63, (%s61)
1095 ; CHECK-NEXT: shm.l %s8, 8(%s61)
1096 ; CHECK-NEXT: shm.l %s11, 16(%s61)
1098 ; CHECK-NEXT: or %s0, 0, %s62
1099 ; CHECK-NEXT: .LBB15_2:
1100 ; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill
1101 ; CHECK-NEXT: or %s18, 0, %s0
1102 ; CHECK-NEXT: sll %s0, %s0, 6
1103 ; CHECK-NEXT: lea %s1, __ve_grow_stack@lo
1104 ; CHECK-NEXT: and %s1, %s1, (32)0
1105 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1)
1106 ; CHECK-NEXT: bsic %s10, (, %s12)
1107 ; CHECK-NEXT: lea %s0, 240(, %s11)
1108 ; CHECK-NEXT: ld %s1, 56(, %s0)
1109 ; CHECK-NEXT: ld %s1, 48(, %s0)
1110 ; CHECK-NEXT: ld %s1, 40(, %s0)
1111 ; CHECK-NEXT: ld %s1, 32(, %s0)
1112 ; CHECK-NEXT: ld %s1, 24(, %s0)
1113 ; CHECK-NEXT: ld %s1, 16(, %s0)
1114 ; CHECK-NEXT: ld %s1, 8(, %s0)
1115 ; CHECK-NEXT: ld %s0, (, %s0)
1116 ; CHECK-NEXT: # implicit-def: $vmp1
1117 ; CHECK-NEXT: ld %s16, 320(, %s17)
1118 ; CHECK-NEXT: lvm %vm3, 0, %s16
1119 ; CHECK-NEXT: ld %s16, 328(, %s17)
1120 ; CHECK-NEXT: lvm %vm3, 1, %s16
1121 ; CHECK-NEXT: ld %s16, 336(, %s17)
1122 ; CHECK-NEXT: lvm %vm3, 2, %s16
1123 ; CHECK-NEXT: ld %s16, 344(, %s17)
1124 ; CHECK-NEXT: lvm %vm3, 3, %s16
1125 ; CHECK-NEXT: ld %s16, 352(, %s17)
1126 ; CHECK-NEXT: lvm %vm2, 0, %s16
1127 ; CHECK-NEXT: ld %s16, 360(, %s17)
1128 ; CHECK-NEXT: lvm %vm2, 1, %s16
1129 ; CHECK-NEXT: ld %s16, 368(, %s17)
1130 ; CHECK-NEXT: lvm %vm2, 2, %s16
1131 ; CHECK-NEXT: ld %s16, 376(, %s17)
1132 ; CHECK-NEXT: lvm %vm2, 3, %s16
1133 ; CHECK-NEXT: svm %s16, %vm3, 0
1134 ; CHECK-NEXT: st %s16, 256(, %s17)
1135 ; CHECK-NEXT: svm %s16, %vm3, 1
1136 ; CHECK-NEXT: st %s16, 264(, %s17)
1137 ; CHECK-NEXT: svm %s16, %vm3, 2
1138 ; CHECK-NEXT: st %s16, 272(, %s17)
1139 ; CHECK-NEXT: svm %s16, %vm3, 3
1140 ; CHECK-NEXT: st %s16, 280(, %s17)
1141 ; CHECK-NEXT: svm %s16, %vm2, 0
1142 ; CHECK-NEXT: st %s16, 288(, %s17)
1143 ; CHECK-NEXT: svm %s16, %vm2, 1
1144 ; CHECK-NEXT: st %s16, 296(, %s17)
1145 ; CHECK-NEXT: svm %s16, %vm2, 2
1146 ; CHECK-NEXT: st %s16, 304(, %s17)
1147 ; CHECK-NEXT: svm %s16, %vm2, 3
1148 ; CHECK-NEXT: st %s16, 312(, %s17) # 64-byte Folded Spill
1149 ; CHECK-NEXT: lea %s0, dummy@lo
1150 ; CHECK-NEXT: and %s0, %s0, (32)0
1151 ; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0)
1152 ; CHECK-NEXT: bsic %s10, (, %s12)
1153 ; CHECK-NEXT: lea %s0, pass@lo
1154 ; CHECK-NEXT: and %s0, %s0, (32)0
1155 ; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0)
1156 ; CHECK-NEXT: or %s0, 0, %s18
1157 ; CHECK-NEXT: bsic %s10, (, %s12)
1158 ; CHECK-NEXT: # implicit-def: $vmp1
1159 ; CHECK-NEXT: ld %s16, 256(, %s17)
1160 ; CHECK-NEXT: lvm %vm3, 0, %s16
1161 ; CHECK-NEXT: ld %s16, 264(, %s17)
1162 ; CHECK-NEXT: lvm %vm3, 1, %s16
1163 ; CHECK-NEXT: ld %s16, 272(, %s17)
1164 ; CHECK-NEXT: lvm %vm3, 2, %s16
1165 ; CHECK-NEXT: ld %s16, 280(, %s17)
1166 ; CHECK-NEXT: lvm %vm3, 3, %s16
1167 ; CHECK-NEXT: ld %s16, 288(, %s17)
1168 ; CHECK-NEXT: lvm %vm2, 0, %s16
1169 ; CHECK-NEXT: ld %s16, 296(, %s17)
1170 ; CHECK-NEXT: lvm %vm2, 1, %s16
1171 ; CHECK-NEXT: ld %s16, 304(, %s17)
1172 ; CHECK-NEXT: lvm %vm2, 2, %s16
1173 ; CHECK-NEXT: ld %s16, 312(, %s17) # 64-byte Folded Reload
1174 ; CHECK-NEXT: lvm %vm2, 3, %s16
1175 ; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload
1176 ; CHECK-NEXT: or %s11, 0, %s9
1177 ; CHECK-NEXT: ld %s17, 40(, %s11)
1178 ; CHECK-NEXT: ld %s10, 8(, %s11)
1179 ; CHECK-NEXT: ld %s9, (, %s11)
1180 ; CHECK-NEXT: b.l.t (, %s10)
1181 %2 = alloca <512 x i1>, align 32
1182 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2)
1183 %3 = alloca <512 x i1>, i64 %0, align 8
1184 %4 = load volatile <512 x i1>, ptr %3, align 64
1185 %5 = load volatile <512 x i1>, ptr %2, align 32
1186 tail call fastcc void @dummy()
1187 tail call fastcc void @pass(i64 noundef %0)
1188 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2)
1192 !2 = !{!"clang version 15.0.0 (git@kaz7.github.com:sx-aurora-dev/llvm-project.git 50263c9e9cc3714bcd816eaea8822d3e010a0f19)"}
1193 !3 = !{!4, !4, i64 0}
1194 !4 = !{!"long", !5, i64 0}
1195 !5 = !{!"omnipotent char", !6, i64 0}
1196 !6 = !{!"Simple C/C++ TBAA"}
1197 !7 = distinct !{!7, !8}
1198 !8 = !{!"llvm.loop.mustprogress"}
1199 !9 = distinct !{!9, !8}
1200 !10 = distinct !{!10, !8}
1201 !11 = distinct !{!11, !8}
1202 !12 = distinct !{!12, !8}
1203 !13 = distinct !{!13, !8}