1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 -verify-machineinstrs | FileCheck %s
4 ; Test stack clash protection probing for static allocas.
7 define i32 @fun0() #0 {
10 ; CHECK-NEXT: aghi %r15, -560
11 ; CHECK-NEXT: .cfi_def_cfa_offset 720
12 ; CHECK-NEXT: cg %r0, 552(%r15)
13 ; CHECK-NEXT: mvhi 552(%r15), 1
14 ; CHECK-NEXT: l %r2, 160(%r15)
15 ; CHECK-NEXT: aghi %r15, 560
18 %a = alloca i32, i64 100
19 %b = getelementptr inbounds i32, i32* %a, i64 98
20 store volatile i32 1, i32* %b
21 %c = load volatile i32, i32* %a
26 define i32 @fun1() #0 {
29 ; CHECK-NEXT: aghi %r15, -4096
30 ; CHECK-NEXT: .cfi_def_cfa_offset 4256
31 ; CHECK-NEXT: cg %r0, 4088(%r15)
32 ; CHECK-NEXT: aghi %r15, -4080
33 ; CHECK-NEXT: .cfi_def_cfa_offset 8336
34 ; CHECK-NEXT: cg %r0, 4072(%r15)
35 ; CHECK-NEXT: mvhi 976(%r15), 1
36 ; CHECK-NEXT: l %r2, 176(%r15)
37 ; CHECK-NEXT: aghi %r15, 8176
40 %a = alloca i32, i64 2000
41 %b = getelementptr inbounds i32, i32* %a, i64 200
42 store volatile i32 1, i32* %b
43 %c = load volatile i32, i32* %a
47 ; Large: Use a loop to allocate and probe in steps.
48 define i32 @fun2() #0 {
51 ; CHECK-NEXT: lgr %r0, %r15
52 ; CHECK-NEXT: .cfi_def_cfa_register %r0
53 ; CHECK-NEXT: agfi %r0, -69632
54 ; CHECK-NEXT: .cfi_def_cfa_offset 69792
55 ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
56 ; CHECK-NEXT: aghi %r15, -4096
57 ; CHECK-NEXT: cg %r0, 4088(%r15)
58 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1
59 ; CHECK-NEXT: # %bb.2:
60 ; CHECK-NEXT: .cfi_def_cfa_register %r15
61 ; CHECK-NEXT: aghi %r15, -2544
62 ; CHECK-NEXT: .cfi_def_cfa_offset 72336
63 ; CHECK-NEXT: cg %r0, 2536(%r15)
64 ; CHECK-NEXT: lhi %r0, 1
65 ; CHECK-NEXT: mvhi 568(%r15), 1
66 ; CHECK-NEXT: sty %r0, 28968(%r15)
67 ; CHECK-NEXT: l %r2, 176(%r15)
68 ; CHECK-NEXT: agfi %r15, 72176
71 %a = alloca i32, i64 18000
72 %b0 = getelementptr inbounds i32, i32* %a, i64 98
73 %b1 = getelementptr inbounds i32, i32* %a, i64 7198
74 store volatile i32 1, i32* %b0
75 store volatile i32 1, i32* %b1
76 %c = load volatile i32, i32* %a
80 ; Ends evenly on the step so no remainder needed.
81 define void @fun3() #0 {
83 ; CHECK: # %bb.0: # %entry
84 ; CHECK-NEXT: lgr %r0, %r15
85 ; CHECK-NEXT: .cfi_def_cfa_register %r0
86 ; CHECK-NEXT: aghi %r0, -28672
87 ; CHECK-NEXT: .cfi_def_cfa_offset 28832
88 ; CHECK-NEXT: .LBB3_1: # %entry
89 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
90 ; CHECK-NEXT: aghi %r15, -4096
91 ; CHECK-NEXT: cg %r0, 4088(%r15)
92 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB3_1
93 ; CHECK-NEXT: # %bb.2: # %entry
94 ; CHECK-NEXT: .cfi_def_cfa_register %r15
95 ; CHECK-NEXT: mvhi 180(%r15), 0
96 ; CHECK-NEXT: l %r0, 180(%r15)
97 ; CHECK-NEXT: aghi %r15, 28672
100 %stack = alloca [7122 x i32], align 4
101 %i = alloca i32, align 4
102 %0 = bitcast [7122 x i32]* %stack to i8*
103 %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
104 store volatile i32 0, i32* %i, align 4
105 %i.0.i.0.6 = load volatile i32, i32* %i, align 4
109 ; Loop with bigger step.
110 define void @fun4() #0 "stack-probe-size"="8192" {
112 ; CHECK: # %bb.0: # %entry
113 ; CHECK-NEXT: lgr %r0, %r15
114 ; CHECK-NEXT: .cfi_def_cfa_register %r0
115 ; CHECK-NEXT: aghi %r0, -24576
116 ; CHECK-NEXT: .cfi_def_cfa_offset 24736
117 ; CHECK-NEXT: .LBB4_1: # %entry
118 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
119 ; CHECK-NEXT: aghi %r15, -8192
120 ; CHECK-NEXT: cg %r0, 8184(%r15)
121 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB4_1
122 ; CHECK-NEXT: # %bb.2: # %entry
123 ; CHECK-NEXT: .cfi_def_cfa_register %r15
124 ; CHECK-NEXT: aghi %r15, -7608
125 ; CHECK-NEXT: .cfi_def_cfa_offset 32344
126 ; CHECK-NEXT: cg %r0, 7600(%r15)
127 ; CHECK-NEXT: mvhi 180(%r15), 0
128 ; CHECK-NEXT: l %r0, 180(%r15)
129 ; CHECK-NEXT: aghi %r15, 32184
130 ; CHECK-NEXT: br %r14
132 %stack = alloca [8000 x i32], align 4
133 %i = alloca i32, align 4
134 %0 = bitcast [8000 x i32]* %stack to i8*
135 %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
136 store volatile i32 0, i32* %i, align 4
137 %i.0.i.0.6 = load volatile i32, i32* %i, align 4
141 ; Probe size should be modulo stack alignment.
142 define void @fun5() #0 "stack-probe-size"="4100" {
144 ; CHECK: # %bb.0: # %entry
145 ; CHECK-NEXT: aghi %r15, -4096
146 ; CHECK-NEXT: .cfi_def_cfa_offset 4256
147 ; CHECK-NEXT: cg %r0, 4088(%r15)
148 ; CHECK-NEXT: aghi %r15, -88
149 ; CHECK-NEXT: .cfi_def_cfa_offset 4344
150 ; CHECK-NEXT: cg %r0, 80(%r15)
151 ; CHECK-NEXT: mvhi 180(%r15), 0
152 ; CHECK-NEXT: l %r0, 180(%r15)
153 ; CHECK-NEXT: aghi %r15, 4184
154 ; CHECK-NEXT: br %r14
156 %stack = alloca [1000 x i32], align 4
157 %i = alloca i32, align 4
158 %0 = bitcast [1000 x i32]* %stack to i8*
159 %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
160 store volatile i32 0, i32* %i, align 4
161 %i.0.i.0.6 = load volatile i32, i32* %i, align 4
165 ; The minimum probe size is the stack alignment.
166 define void @fun6() #0 "stack-probe-size"="5" {
168 ; CHECK: # %bb.0: # %entry
169 ; CHECK-NEXT: lgr %r0, %r15
170 ; CHECK-NEXT: .cfi_def_cfa_register %r0
171 ; CHECK-NEXT: aghi %r0, -4184
172 ; CHECK-NEXT: .cfi_def_cfa_offset 4344
173 ; CHECK-NEXT: .LBB6_1: # %entry
174 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
175 ; CHECK-NEXT: aghi %r15, -8
176 ; CHECK-NEXT: cg %r0, 0(%r15)
177 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB6_1
178 ; CHECK-NEXT: # %bb.2: # %entry
179 ; CHECK-NEXT: .cfi_def_cfa_register %r15
180 ; CHECK-NEXT: mvhi 180(%r15), 0
181 ; CHECK-NEXT: l %r0, 180(%r15)
182 ; CHECK-NEXT: aghi %r15, 4184
183 ; CHECK-NEXT: br %r14
185 %stack = alloca [1000 x i32], align 4
186 %i = alloca i32, align 4
187 %0 = bitcast [1000 x i32]* %stack to i8*
188 %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
189 store volatile i32 0, i32* %i, align 4
190 %i.0.i.0.6 = load volatile i32, i32* %i, align 4
194 ; Small with a natural probe (STMG) - needs no extra probe.
195 define i32 @fun7() #0 {
198 ; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
199 ; CHECK-NEXT: .cfi_offset %r14, -48
200 ; CHECK-NEXT: .cfi_offset %r15, -40
201 ; CHECK-NEXT: aghi %r15, -3976
202 ; CHECK-NEXT: .cfi_def_cfa_offset 4136
203 ; CHECK-NEXT: brasl %r14, foo@PLT
204 ; CHECK-NEXT: st %r2, 568(%r15)
205 ; CHECK-NEXT: l %r2, 176(%r15)
206 ; CHECK-NEXT: lmg %r14, %r15, 4088(%r15)
207 ; CHECK-NEXT: br %r14
209 %a = alloca i32, i64 950
210 %b = getelementptr inbounds i32, i32* %a, i64 98
211 store volatile i32 %v, i32* %b
212 %c = load volatile i32, i32* %a
216 ; Medium with an STMG - still needs probing.
217 define i32 @fun8() #0 {
220 ; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
221 ; CHECK-NEXT: .cfi_offset %r14, -48
222 ; CHECK-NEXT: .cfi_offset %r15, -40
223 ; CHECK-NEXT: aghi %r15, -3984
224 ; CHECK-NEXT: .cfi_def_cfa_offset 4144
225 ; CHECK-NEXT: cg %r0, 3976(%r15)
226 ; CHECK-NEXT: brasl %r14, foo@PLT
227 ; CHECK-NEXT: st %r2, 976(%r15)
228 ; CHECK-NEXT: l %r2, 176(%r15)
229 ; CHECK-NEXT: lmg %r14, %r15, 4096(%r15)
230 ; CHECK-NEXT: br %r14
233 %a = alloca i32, i64 952
234 %b = getelementptr inbounds i32, i32* %a, i64 200
235 store volatile i32 %v, i32* %b
236 %c = load volatile i32, i32* %a
240 define void @fun9() #0 "backchain" {
242 ; CHECK: # %bb.0: # %entry
243 ; CHECK-NEXT: lgr %r1, %r15
244 ; CHECK-NEXT: lgr %r0, %r15
245 ; CHECK-NEXT: .cfi_def_cfa_register %r0
246 ; CHECK-NEXT: aghi %r0, -28672
247 ; CHECK-NEXT: .cfi_def_cfa_offset 28832
248 ; CHECK-NEXT: .LBB9_1: # %entry
249 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
250 ; CHECK-NEXT: aghi %r15, -4096
251 ; CHECK-NEXT: cg %r0, 4088(%r15)
252 ; CHECK-NEXT: clgrjh %r15, %r0, .LBB9_1
253 ; CHECK-NEXT: # %bb.2: # %entry
254 ; CHECK-NEXT: .cfi_def_cfa_register %r15
255 ; CHECK-NEXT: stg %r1, 0(%r15)
256 ; CHECK-NEXT: mvhi 180(%r15), 0
257 ; CHECK-NEXT: l %r0, 180(%r15)
258 ; CHECK-NEXT: aghi %r15, 28672
259 ; CHECK-NEXT: br %r14
261 %stack = alloca [7122 x i32], align 4
262 %i = alloca i32, align 4
263 %0 = bitcast [7122 x i32]* %stack to i8*
264 %i.0.i.0..sroa_cast = bitcast i32* %i to i8*
265 store volatile i32 0, i32* %i, align 4
266 %i.0.i.0.6 = load volatile i32, i32* %i, align 4
272 attributes #0 = { "probe-stack"="inline-asm" }