1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=0 | FileCheck %s
4 declare void @use(ptr %p)
5 declare void @llvm.aarch64.settag(ptr %p, i64 %a)
6 declare void @llvm.aarch64.settag.zero(ptr %p, i64 %a)
8 define void @stg16_16() {
9 ; CHECK-LABEL: stg16_16:
10 ; CHECK: // %bb.0: // %entry
11 ; CHECK-NEXT: sub sp, sp, #32
12 ; CHECK-NEXT: .cfi_def_cfa_offset 32
13 ; CHECK-NEXT: st2g sp, [sp], #32
16 %a = alloca i8, i32 16, align 16
17 %b = alloca i8, i32 16, align 16
18 call void @llvm.aarch64.settag(ptr %a, i64 16)
19 call void @llvm.aarch64.settag(ptr %b, i64 16)
23 define i32 @stg16_16_16_16_ret() {
24 ; CHECK-LABEL: stg16_16_16_16_ret:
25 ; CHECK: // %bb.0: // %entry
26 ; CHECK-NEXT: sub sp, sp, #64
27 ; CHECK-NEXT: .cfi_def_cfa_offset 64
28 ; CHECK-NEXT: mov w0, wzr
29 ; CHECK-NEXT: st2g sp, [sp, #32]
30 ; CHECK-NEXT: st2g sp, [sp], #64
33 %a = alloca i8, i32 16, align 16
34 %b = alloca i8, i32 16, align 16
35 %c = alloca i8, i32 16, align 16
36 %d = alloca i8, i32 16, align 16
37 call void @llvm.aarch64.settag(ptr %a, i64 16)
38 call void @llvm.aarch64.settag(ptr %b, i64 16)
39 call void @llvm.aarch64.settag(ptr %c, i64 16)
40 call void @llvm.aarch64.settag(ptr %d, i64 16)
44 define void @stg16_16_16_16() {
45 ; CHECK-LABEL: stg16_16_16_16:
46 ; CHECK: // %bb.0: // %entry
47 ; CHECK-NEXT: sub sp, sp, #64
48 ; CHECK-NEXT: .cfi_def_cfa_offset 64
49 ; CHECK-NEXT: st2g sp, [sp, #32]
50 ; CHECK-NEXT: st2g sp, [sp], #64
53 %a = alloca i8, i32 16, align 16
54 %b = alloca i8, i32 16, align 16
55 %c = alloca i8, i32 16, align 16
56 %d = alloca i8, i32 16, align 16
57 call void @llvm.aarch64.settag(ptr %a, i64 16)
58 call void @llvm.aarch64.settag(ptr %b, i64 16)
59 call void @llvm.aarch64.settag(ptr %c, i64 16)
60 call void @llvm.aarch64.settag(ptr %d, i64 16)
64 define void @stg128_128_128_128() {
65 ; CHECK-LABEL: stg128_128_128_128:
66 ; CHECK: // %bb.0: // %entry
67 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
68 ; CHECK-NEXT: sub sp, sp, #512
69 ; CHECK-NEXT: .cfi_def_cfa_offset 528
70 ; CHECK-NEXT: .cfi_offset w29, -16
71 ; CHECK-NEXT: mov x8, #512 // =0x200
72 ; CHECK-NEXT: .LBB3_1: // %entry
73 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
74 ; CHECK-NEXT: st2g sp, [sp], #32
75 ; CHECK-NEXT: subs x8, x8, #32
76 ; CHECK-NEXT: b.ne .LBB3_1
77 ; CHECK-NEXT: // %bb.2: // %entry
78 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
81 %a = alloca i8, i32 128, align 16
82 %b = alloca i8, i32 128, align 16
83 %c = alloca i8, i32 128, align 16
84 %d = alloca i8, i32 128, align 16
85 call void @llvm.aarch64.settag(ptr %a, i64 128)
86 call void @llvm.aarch64.settag(ptr %b, i64 128)
87 call void @llvm.aarch64.settag(ptr %c, i64 128)
88 call void @llvm.aarch64.settag(ptr %d, i64 128)
92 define void @stg16_512_16() {
93 ; CHECK-LABEL: stg16_512_16:
94 ; CHECK: // %bb.0: // %entry
95 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
96 ; CHECK-NEXT: sub sp, sp, #544
97 ; CHECK-NEXT: .cfi_def_cfa_offset 560
98 ; CHECK-NEXT: .cfi_offset w29, -16
99 ; CHECK-NEXT: mov x8, #544 // =0x220
100 ; CHECK-NEXT: .LBB4_1: // %entry
101 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
102 ; CHECK-NEXT: st2g sp, [sp], #32
103 ; CHECK-NEXT: subs x8, x8, #32
104 ; CHECK-NEXT: b.ne .LBB4_1
105 ; CHECK-NEXT: // %bb.2: // %entry
106 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
109 %a = alloca i8, i32 16, align 16
110 %b = alloca i8, i32 512, align 16
111 %c = alloca i8, i32 16, align 16
112 call void @llvm.aarch64.settag(ptr %a, i64 16)
113 call void @llvm.aarch64.settag(ptr %b, i64 512)
114 call void @llvm.aarch64.settag(ptr %c, i64 16)
118 define void @stg512_512_512() {
119 ; CHECK-LABEL: stg512_512_512:
120 ; CHECK: // %bb.0: // %entry
121 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
122 ; CHECK-NEXT: sub sp, sp, #1536
123 ; CHECK-NEXT: .cfi_def_cfa_offset 1552
124 ; CHECK-NEXT: .cfi_offset w29, -16
125 ; CHECK-NEXT: mov x8, #1536 // =0x600
126 ; CHECK-NEXT: .LBB5_1: // %entry
127 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
128 ; CHECK-NEXT: st2g sp, [sp], #32
129 ; CHECK-NEXT: subs x8, x8, #32
130 ; CHECK-NEXT: b.ne .LBB5_1
131 ; CHECK-NEXT: // %bb.2: // %entry
132 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
135 %a = alloca i8, i32 512, align 16
136 %b = alloca i8, i32 512, align 16
137 %c = alloca i8, i32 512, align 16
138 call void @llvm.aarch64.settag(ptr %a, i64 512)
139 call void @llvm.aarch64.settag(ptr %b, i64 512)
140 call void @llvm.aarch64.settag(ptr %c, i64 512)
144 define void @early(i1 %flag) {
145 ; CHECK-LABEL: early:
146 ; CHECK: // %bb.0: // %entry
147 ; CHECK-NEXT: sub sp, sp, #144
148 ; CHECK-NEXT: .cfi_def_cfa_offset 144
149 ; CHECK-NEXT: tbz w0, #0, .LBB6_2
150 ; CHECK-NEXT: // %bb.1: // %if.then
151 ; CHECK-NEXT: st2g sp, [sp, #48]
152 ; CHECK-NEXT: st2g sp, [sp, #80]
153 ; CHECK-NEXT: st2g sp, [sp, #112]
154 ; CHECK-NEXT: .LBB6_2: // %if.end
155 ; CHECK-NEXT: stg sp, [sp, #32]
156 ; CHECK-NEXT: st2g sp, [sp], #144
159 %a = alloca i8, i32 48, align 16
160 %b = alloca i8, i32 48, align 16
161 %c = alloca i8, i32 48, align 16
162 br i1 %flag, label %if.then, label %if.end
165 call void @llvm.aarch64.settag(ptr %a, i64 48)
166 call void @llvm.aarch64.settag(ptr %b, i64 48)
170 call void @llvm.aarch64.settag(ptr %c, i64 48)
174 define void @early_128_128(i1 %flag) {
175 ; CHECK-LABEL: early_128_128:
176 ; CHECK: // %bb.0: // %entry
177 ; CHECK-NEXT: sub sp, sp, #320
178 ; CHECK-NEXT: str x29, [sp, #304] // 8-byte Folded Spill
179 ; CHECK-NEXT: .cfi_def_cfa_offset 320
180 ; CHECK-NEXT: .cfi_offset w29, -16
181 ; CHECK-NEXT: tbz w0, #0, .LBB7_4
182 ; CHECK-NEXT: // %bb.1: // %if.then
183 ; CHECK-NEXT: add x9, sp, #48
184 ; CHECK-NEXT: mov x8, #256 // =0x100
185 ; CHECK-NEXT: .LBB7_2: // %if.then
186 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
187 ; CHECK-NEXT: st2g x9, [x9], #32
188 ; CHECK-NEXT: subs x8, x8, #32
189 ; CHECK-NEXT: b.ne .LBB7_2
190 ; CHECK-NEXT: // %bb.3: // %if.then
191 ; CHECK-NEXT: .LBB7_4: // %if.end
192 ; CHECK-NEXT: stg sp, [sp, #32]
193 ; CHECK-NEXT: st2g sp, [sp], #304
194 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
197 %a = alloca i8, i32 128, align 16
198 %b = alloca i8, i32 128, align 16
199 %c = alloca i8, i32 48, align 16
200 br i1 %flag, label %if.then, label %if.end
203 call void @llvm.aarch64.settag(ptr %a, i64 128)
204 call void @llvm.aarch64.settag(ptr %b, i64 128)
208 call void @llvm.aarch64.settag(ptr %c, i64 48)
212 define void @early_512_512(i1 %flag) {
213 ; CHECK-LABEL: early_512_512:
214 ; CHECK: // %bb.0: // %entry
215 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
216 ; CHECK-NEXT: sub sp, sp, #1072
217 ; CHECK-NEXT: .cfi_def_cfa_offset 1088
218 ; CHECK-NEXT: .cfi_offset w29, -16
219 ; CHECK-NEXT: tbz w0, #0, .LBB8_4
220 ; CHECK-NEXT: // %bb.1: // %if.then
221 ; CHECK-NEXT: add x9, sp, #48
222 ; CHECK-NEXT: mov x8, #1024 // =0x400
223 ; CHECK-NEXT: .LBB8_2: // %if.then
224 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
225 ; CHECK-NEXT: st2g x9, [x9], #32
226 ; CHECK-NEXT: subs x8, x8, #32
227 ; CHECK-NEXT: b.ne .LBB8_2
228 ; CHECK-NEXT: // %bb.3: // %if.then
229 ; CHECK-NEXT: .LBB8_4: // %if.end
230 ; CHECK-NEXT: stg sp, [sp, #32]
231 ; CHECK-NEXT: st2g sp, [sp], #1072
232 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
235 %a = alloca i8, i32 512, align 16
236 %b = alloca i8, i32 512, align 16
237 %c = alloca i8, i32 48, align 16
238 br i1 %flag, label %if.then, label %if.end
241 call void @llvm.aarch64.settag(ptr %a, i64 512)
242 call void @llvm.aarch64.settag(ptr %b, i64 512)
246 call void @llvm.aarch64.settag(ptr %c, i64 48)
250 ; Two loops of size 256; the second loop updates SP.
251 define void @stg128_128_gap_128_128() {
252 ; CHECK-LABEL: stg128_128_gap_128_128:
253 ; CHECK: // %bb.0: // %entry
254 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
255 ; CHECK-NEXT: sub sp, sp, #544
256 ; CHECK-NEXT: .cfi_def_cfa_offset 560
257 ; CHECK-NEXT: .cfi_offset w30, -8
258 ; CHECK-NEXT: .cfi_offset w29, -16
259 ; CHECK-NEXT: add x0, sp, #256
261 ; CHECK-NEXT: mov x9, sp
262 ; CHECK-NEXT: mov x8, #256 // =0x100
263 ; CHECK-NEXT: .LBB9_1: // %entry
264 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
265 ; CHECK-NEXT: st2g x9, [x9], #32
266 ; CHECK-NEXT: subs x8, x8, #32
267 ; CHECK-NEXT: b.ne .LBB9_1
268 ; CHECK-NEXT: // %bb.2: // %entry
269 ; CHECK-NEXT: add sp, sp, #288
270 ; CHECK-NEXT: mov x8, #256 // =0x100
271 ; CHECK-NEXT: .LBB9_3: // %entry
272 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
273 ; CHECK-NEXT: st2g sp, [sp], #32
274 ; CHECK-NEXT: subs x8, x8, #32
275 ; CHECK-NEXT: b.ne .LBB9_3
276 ; CHECK-NEXT: // %bb.4: // %entry
277 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
280 %a = alloca i8, i32 128, align 16
281 %a2 = alloca i8, i32 128, align 16
282 %b = alloca i8, i32 32, align 16
283 %c = alloca i8, i32 128, align 16
284 %c2 = alloca i8, i32 128, align 16
285 call void @use(ptr %b)
286 call void @llvm.aarch64.settag(ptr %a, i64 128)
287 call void @llvm.aarch64.settag(ptr %a2, i64 128)
288 call void @llvm.aarch64.settag(ptr %c, i64 128)
289 call void @llvm.aarch64.settag(ptr %c2, i64 128)
293 ; Function Attrs: nounwind
294 declare i32 @printf(ptr, ...) #0
296 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
299 ; Insert point of stg merge is followed by nzcv read
300 ; Don't merge in this case
302 define i32 @nzcv_clobber(i32 %in) {
304 ; CHECK-LABEL: nzcv_clobber:
305 ; CHECK: stg sp, [sp, #528]
306 ; CHECK-NEXT: .LBB10_1:
307 ; CHECK: st2g x9, [x9], #32
308 ; CHECK-NEXT: subs x8, x8, #32
309 ; CHECK-NEXT: b.ne .LBB10_1
310 ; CHECK-NEXT: // %bb.2:
311 ; CHECK-NEXT: cmp w0, #10
312 ; CHECK-NEXT: stg sp, [sp]
313 ; CHECK-NEXT: b.ge .LBB10_4
315 %a = alloca i8, i32 16, align 16
316 %b = alloca i8, i32 512, align 16
317 %c = alloca i8, i32 16, align 16
318 call void @llvm.aarch64.settag(ptr %a, i64 16)
319 call void @llvm.aarch64.settag(ptr %b, i64 512)
320 %cmp = icmp slt i32 %in, 10
321 call void @llvm.aarch64.settag(ptr %c, i64 16)
322 br i1 %cmp, label %return0, label %return1
324 return0: ; preds = %entry
325 %call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1
333 ; Insert point of stg merge is not followed by nzcv read
336 define i32 @nzcv_no_clobber(i32 %in) {
338 ; CHECK-LABEL: nzcv_no_clobber:
339 ; CHECK: mov x8, #544
340 ; CHECK-NEXT: .LBB11_1:
341 ; CHECK: st2g sp, [sp], #32
342 ; CHECK-NEXT: subs x8, x8, #32
343 ; CHECK-NEXT: b.ne .LBB11_1
346 %a = alloca i8, i32 16, align 16
347 %b = alloca i8, i32 512, align 16
348 %c = alloca i8, i32 16, align 16
349 call void @llvm.aarch64.settag(ptr %a, i64 16)
350 call void @llvm.aarch64.settag(ptr %b, i64 512)
351 call void @llvm.aarch64.settag(ptr %c, i64 16)
354 return0: ; preds = %entry
355 %call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1