1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 -verify-machineinstrs | FileCheck %s
4 ; Test stack clash protection probing for static allocas.
7 define i32 @fun0() #0 {
10 ; CHECK-NEXT: aghi %r15, -560
11 ; CHECK-NEXT: .cfi_def_cfa_offset 720
12 ; CHECK-NEXT: cg %r0, 552(%r15)
13 ; CHECK-NEXT: mvhi 552(%r15), 1
14 ; CHECK-NEXT: l %r2, 160(%r15)
15 ; CHECK-NEXT: aghi %r15, 560
18 %a = alloca i32, i64 100
19 %b = getelementptr inbounds i32, ptr %a, i64 98
20 store volatile i32 1, ptr %b
21 %c = load volatile i32, ptr %a
26 define i32 @fun1() #0 {
29 ; CHECK-NEXT: aghi %r15, -4096
30 ; CHECK-NEXT: .cfi_def_cfa_offset 4256
31 ; CHECK-NEXT: cg %r0, 4088(%r15)
32 ; CHECK-NEXT: aghi %r15, -4080
33 ; CHECK-NEXT: .cfi_def_cfa_offset 8336
34 ; CHECK-NEXT: cg %r0, 4072(%r15)
35 ; CHECK-NEXT: mvhi 976(%r15), 1
36 ; CHECK-NEXT: l %r2, 176(%r15)
37 ; CHECK-NEXT: aghi %r15, 8176
40 %a = alloca i32, i64 2000
41 %b = getelementptr inbounds i32, ptr %a, i64 200
42 store volatile i32 1, ptr %b
43 %c = load volatile i32, ptr %a
47 ; Large: Use a loop to allocate and probe in steps.
48 define i32 @fun2() #0 {
51 ; CHECK-NEXT: lgr %r0, %r15
52 ; CHECK-NEXT: .cfi_def_cfa_register %r0
53 ; CHECK-NEXT: agfi %r0, -69632
54 ; CHECK-NEXT: .cfi_def_cfa_offset 69792
55 ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
56 ; CHECK-NEXT: aghi %r15, -4096
57 ; CHECK-NEXT: cg %r0, 4088(%r15)
58 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1
59 ; CHECK-NEXT: # %bb.2:
60 ; CHECK-NEXT: .cfi_def_cfa_register %r15
61 ; CHECK-NEXT: aghi %r15, -2544
62 ; CHECK-NEXT: .cfi_def_cfa_offset 72336
63 ; CHECK-NEXT: cg %r0, 2536(%r15)
64 ; CHECK-NEXT: lhi %r0, 1
65 ; CHECK-NEXT: mvhi 568(%r15), 1
66 ; CHECK-NEXT: sty %r0, 28968(%r15)
67 ; CHECK-NEXT: l %r2, 176(%r15)
68 ; CHECK-NEXT: agfi %r15, 72176
71 %a = alloca i32, i64 18000
72 %b0 = getelementptr inbounds i32, ptr %a, i64 98
73 %b1 = getelementptr inbounds i32, ptr %a, i64 7198
74 store volatile i32 1, ptr %b0
75 store volatile i32 1, ptr %b1
76 %c = load volatile i32, ptr %a
80 ; Ends evenly on the step so no remainder needed.
81 define void @fun3() #0 {
83 ; CHECK: # %bb.0: # %entry
84 ; CHECK-NEXT: lgr %r0, %r15
85 ; CHECK-NEXT: .cfi_def_cfa_register %r0
86 ; CHECK-NEXT: aghi %r0, -28672
87 ; CHECK-NEXT: .cfi_def_cfa_offset 28832
88 ; CHECK-NEXT: .LBB3_1: # %entry
89 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
90 ; CHECK-NEXT: aghi %r15, -4096
91 ; CHECK-NEXT: cg %r0, 4088(%r15)
92 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB3_1
93 ; CHECK-NEXT: # %bb.2: # %entry
94 ; CHECK-NEXT: .cfi_def_cfa_register %r15
95 ; CHECK-NEXT: mvhi 180(%r15), 0
96 ; CHECK-NEXT: l %r0, 180(%r15)
97 ; CHECK-NEXT: aghi %r15, 28672
100 %stack = alloca [7122 x i32], align 4
101 %i = alloca i32, align 4
102 store volatile i32 0, ptr %i, align 4
103 %i.0.i.0.6 = load volatile i32, ptr %i, align 4
107 ; Loop with bigger step.
108 define void @fun4() #0 "stack-probe-size"="8192" {
110 ; CHECK: # %bb.0: # %entry
111 ; CHECK-NEXT: lgr %r0, %r15
112 ; CHECK-NEXT: .cfi_def_cfa_register %r0
113 ; CHECK-NEXT: aghi %r0, -24576
114 ; CHECK-NEXT: .cfi_def_cfa_offset 24736
115 ; CHECK-NEXT: .LBB4_1: # %entry
116 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
117 ; CHECK-NEXT: aghi %r15, -8192
118 ; CHECK-NEXT: cg %r0, 8184(%r15)
119 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB4_1
120 ; CHECK-NEXT: # %bb.2: # %entry
121 ; CHECK-NEXT: .cfi_def_cfa_register %r15
122 ; CHECK-NEXT: aghi %r15, -7608
123 ; CHECK-NEXT: .cfi_def_cfa_offset 32344
124 ; CHECK-NEXT: cg %r0, 7600(%r15)
125 ; CHECK-NEXT: mvhi 180(%r15), 0
126 ; CHECK-NEXT: l %r0, 180(%r15)
127 ; CHECK-NEXT: aghi %r15, 32184
128 ; CHECK-NEXT: br %r14
130 %stack = alloca [8000 x i32], align 4
131 %i = alloca i32, align 4
132 store volatile i32 0, ptr %i, align 4
133 %i.0.i.0.6 = load volatile i32, ptr %i, align 4
137 ; Probe size should be modulo stack alignment.
138 define void @fun5() #0 "stack-probe-size"="4100" {
140 ; CHECK: # %bb.0: # %entry
141 ; CHECK-NEXT: aghi %r15, -4096
142 ; CHECK-NEXT: .cfi_def_cfa_offset 4256
143 ; CHECK-NEXT: cg %r0, 4088(%r15)
144 ; CHECK-NEXT: aghi %r15, -88
145 ; CHECK-NEXT: .cfi_def_cfa_offset 4344
146 ; CHECK-NEXT: cg %r0, 80(%r15)
147 ; CHECK-NEXT: mvhi 180(%r15), 0
148 ; CHECK-NEXT: l %r0, 180(%r15)
149 ; CHECK-NEXT: aghi %r15, 4184
150 ; CHECK-NEXT: br %r14
152 %stack = alloca [1000 x i32], align 4
153 %i = alloca i32, align 4
154 store volatile i32 0, ptr %i, align 4
155 %i.0.i.0.6 = load volatile i32, ptr %i, align 4
159 ; The minimum probe size is the stack alignment.
160 define void @fun6() #0 "stack-probe-size"="5" {
162 ; CHECK: # %bb.0: # %entry
163 ; CHECK-NEXT: lgr %r0, %r15
164 ; CHECK-NEXT: .cfi_def_cfa_register %r0
165 ; CHECK-NEXT: aghi %r0, -4184
166 ; CHECK-NEXT: .cfi_def_cfa_offset 4344
167 ; CHECK-NEXT: .LBB6_1: # %entry
168 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
169 ; CHECK-NEXT: aghi %r15, -8
170 ; CHECK-NEXT: cg %r0, 0(%r15)
171 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB6_1
172 ; CHECK-NEXT: # %bb.2: # %entry
173 ; CHECK-NEXT: .cfi_def_cfa_register %r15
174 ; CHECK-NEXT: mvhi 180(%r15), 0
175 ; CHECK-NEXT: l %r0, 180(%r15)
176 ; CHECK-NEXT: aghi %r15, 4184
177 ; CHECK-NEXT: br %r14
179 %stack = alloca [1000 x i32], align 4
180 %i = alloca i32, align 4
181 store volatile i32 0, ptr %i, align 4
182 %i.0.i.0.6 = load volatile i32, ptr %i, align 4
186 ; Small with a natural probe (STMG) - needs no extra probe.
187 define i32 @fun7() #0 {
190 ; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
191 ; CHECK-NEXT: .cfi_offset %r14, -48
192 ; CHECK-NEXT: .cfi_offset %r15, -40
193 ; CHECK-NEXT: aghi %r15, -3976
194 ; CHECK-NEXT: .cfi_def_cfa_offset 4136
195 ; CHECK-NEXT: brasl %r14, foo@PLT
196 ; CHECK-NEXT: st %r2, 568(%r15)
197 ; CHECK-NEXT: l %r2, 176(%r15)
198 ; CHECK-NEXT: lmg %r14, %r15, 4088(%r15)
199 ; CHECK-NEXT: br %r14
201 %a = alloca i32, i64 950
202 %b = getelementptr inbounds i32, ptr %a, i64 98
203 store volatile i32 %v, ptr %b
204 %c = load volatile i32, ptr %a
208 ; Medium with an STMG - still needs probing.
209 define i32 @fun8() #0 {
212 ; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
213 ; CHECK-NEXT: .cfi_offset %r14, -48
214 ; CHECK-NEXT: .cfi_offset %r15, -40
215 ; CHECK-NEXT: aghi %r15, -3984
216 ; CHECK-NEXT: .cfi_def_cfa_offset 4144
217 ; CHECK-NEXT: cg %r0, 3976(%r15)
218 ; CHECK-NEXT: brasl %r14, foo@PLT
219 ; CHECK-NEXT: st %r2, 976(%r15)
220 ; CHECK-NEXT: l %r2, 176(%r15)
221 ; CHECK-NEXT: lmg %r14, %r15, 4096(%r15)
222 ; CHECK-NEXT: br %r14
225 %a = alloca i32, i64 952
226 %b = getelementptr inbounds i32, ptr %a, i64 200
227 store volatile i32 %v, ptr %b
228 %c = load volatile i32, ptr %a
232 define void @fun9() #0 "backchain" {
234 ; CHECK: # %bb.0: # %entry
235 ; CHECK-NEXT: lgr %r1, %r15
236 ; CHECK-NEXT: lgr %r0, %r15
237 ; CHECK-NEXT: .cfi_def_cfa_register %r0
238 ; CHECK-NEXT: aghi %r0, -28672
239 ; CHECK-NEXT: .cfi_def_cfa_offset 28832
240 ; CHECK-NEXT: .LBB9_1: # %entry
241 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
242 ; CHECK-NEXT: aghi %r15, -4096
243 ; CHECK-NEXT: cg %r0, 4088(%r15)
244 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB9_1
245 ; CHECK-NEXT: # %bb.2: # %entry
246 ; CHECK-NEXT: .cfi_def_cfa_register %r15
247 ; CHECK-NEXT: stg %r1, 0(%r15)
248 ; CHECK-NEXT: mvhi 180(%r15), 0
249 ; CHECK-NEXT: l %r0, 180(%r15)
250 ; CHECK-NEXT: aghi %r15, 28672
251 ; CHECK-NEXT: br %r14
253 %stack = alloca [7122 x i32], align 4
254 %i = alloca i32, align 4
255 store volatile i32 0, ptr %i, align 4
256 %i.0.i.0.6 = load volatile i32, ptr %i, align 4
262 attributes #0 = { "probe-stack"="inline-asm" }