1 ; Test the generated function prologs/epilogs under XPLINK64 on z/OS
3 ; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck --check-prefixes=CHECK64,CHECK %s
5 ; Test prolog/epilog for non-XPLEAF.
9 ; CHECK64: stmg 6,7,1872(4)
10 ; stmg instruction's displacement field must be 2064-dsa_size
12 ; CHECK64: aghi 4,-192
14 ; CHECK64: lg 7,2072(4)
18 ; CHECK64: L#PPA1_func0_0:
19 ; CHECK64: .short 0 * Length/4 of Parms
20 define void @func0() {
21 call i64 (i64) @fun(i64 10)
27 ; CHECK64: stmg 6,15,1904(4)
28 ; CHECK64: aghi 4,-160
30 ; CHECK64: lmg 7,15,2072(4)
34 ; CHECK64: L#PPA1_func1_0:
35 ; CHECK64: .short 2 * Length/4 of Parms
36 define void @func1(ptr %ptr) {
37 %l01 = load volatile i64, ptr %ptr
38 %l02 = load volatile i64, ptr %ptr
39 %l03 = load volatile i64, ptr %ptr
40 %l04 = load volatile i64, ptr %ptr
41 %l05 = load volatile i64, ptr %ptr
42 %l06 = load volatile i64, ptr %ptr
43 %l07 = load volatile i64, ptr %ptr
44 %l08 = load volatile i64, ptr %ptr
45 %l09 = load volatile i64, ptr %ptr
46 %l10 = load volatile i64, ptr %ptr
47 %l11 = load volatile i64, ptr %ptr
48 %l12 = load volatile i64, ptr %ptr
49 %l13 = load volatile i64, ptr %ptr
50 %l14 = load volatile i64, ptr %ptr
51 %l15 = load volatile i64, ptr %ptr
52 %add01 = add i64 %l01, %l01
53 %add02 = add i64 %l02, %add01
54 %add03 = add i64 %l03, %add02
55 %add04 = add i64 %l04, %add03
56 %add05 = add i64 %l05, %add04
57 %add06 = add i64 %l06, %add05
58 %add07 = add i64 %l07, %add06
59 %add08 = add i64 %l08, %add07
60 %add09 = add i64 %l09, %add08
61 %add10 = add i64 %l10, %add09
62 %add11 = add i64 %l11, %add10
63 %add12 = add i64 %l12, %add11
64 %add13 = add i64 %l13, %add12
65 %add14 = add i64 %l14, %add13
66 %add15 = add i64 %l15, %add14
67 store volatile i64 %add01, ptr %ptr
68 store volatile i64 %add02, ptr %ptr
69 store volatile i64 %add03, ptr %ptr
70 store volatile i64 %add04, ptr %ptr
71 store volatile i64 %add05, ptr %ptr
72 store volatile i64 %add06, ptr %ptr
73 store volatile i64 %add07, ptr %ptr
74 store volatile i64 %add08, ptr %ptr
75 store volatile i64 %add09, ptr %ptr
76 store volatile i64 %add10, ptr %ptr
77 store volatile i64 %add11, ptr %ptr
78 store volatile i64 %add12, ptr %ptr
79 store volatile i64 %add13, ptr %ptr
80 store volatile i64 %add14, ptr %ptr
81 store volatile i64 %add15, ptr %ptr
86 ; Spill all FPRs and VRs
88 ; CHECK64: stmg 6,7,1744(4)
89 ; CHECK64: aghi 4,-320
90 ; CHECK64: std 15,{{[0-9]+}}(4) * 8-byte Folded Spill
91 ; CHECK64: std 14,{{[0-9]+}}(4) * 8-byte Folded Spill
92 ; CHECK64: std 13,{{[0-9]+}}(4) * 8-byte Folded Spill
93 ; CHECK64: std 12,{{[0-9]+}}(4) * 8-byte Folded Spill
94 ; CHECK64: std 11,{{[0-9]+}}(4) * 8-byte Folded Spill
95 ; CHECK64: std 10,{{[0-9]+}}(4) * 8-byte Folded Spill
96 ; CHECK64: std 9,{{[0-9]+}}(4) * 8-byte Folded Spill
97 ; CHECK64: std 8,{{[0-9]+}}(4) * 8-byte Folded Spill
98 ; CHECK64: vst 23,{{[0-9]+}}(4),4 * 16-byte Folded Spill
99 ; CHECK64: vst 22,{{[0-9]+}}(4),4 * 16-byte Folded Spill
100 ; CHECK64: vst 21,{{[0-9]+}}(4),4 * 16-byte Folded Spill
101 ; CHECK64: vst 20,{{[0-9]+}}(4),4 * 16-byte Folded Spill
102 ; CHECK64: vst 19,{{[0-9]+}}(4),4 * 16-byte Folded Spill
103 ; CHECK64: vst 18,{{[0-9]+}}(4),4 * 16-byte Folded Spill
104 ; CHECK64: vst 17,{{[0-9]+}}(4),4 * 16-byte Folded Spill
105 ; CHECK64: vst 16,{{[0-9]+}}(4),4 * 16-byte Folded Spill
107 ; CHECK64: ld 15,{{[0-9]+}}(4) * 8-byte Folded Reload
108 ; CHECK64: ld 14,{{[0-9]+}}(4) * 8-byte Folded Reload
109 ; CHECK64: ld 13,{{[0-9]+}}(4) * 8-byte Folded Reload
110 ; CHECK64: ld 12,{{[0-9]+}}(4) * 8-byte Folded Reload
111 ; CHECK64: ld 11,{{[0-9]+}}(4) * 8-byte Folded Reload
112 ; CHECK64: ld 10,{{[0-9]+}}(4) * 8-byte Folded Reload
113 ; CHECK64: ld 9,{{[0-9]+}}(4) * 8-byte Folded Reload
114 ; CHECK64: ld 8,{{[0-9]+}}(4) * 8-byte Folded Reload
115 ; CHECK64: vl 23,{{[0-9]+}}(4),4 * 16-byte Folded Reload
116 ; CHECK64: vl 22,{{[0-9]+}}(4),4 * 16-byte Folded Reload
117 ; CHECK64: vl 21,{{[0-9]+}}(4),4 * 16-byte Folded Reload
118 ; CHECK64: vl 20,{{[0-9]+}}(4),4 * 16-byte Folded Reload
119 ; CHECK64: vl 19,{{[0-9]+}}(4),4 * 16-byte Folded Reload
120 ; CHECK64: vl 18,{{[0-9]+}}(4),4 * 16-byte Folded Reload
121 ; CHECK64: vl 17,{{[0-9]+}}(4),4 * 16-byte Folded Reload
122 ; CHECK64: vl 16,{{[0-9]+}}(4),4 * 16-byte Folded Reload
123 ; CHECK64: lg 7,2072(4)
124 ; CHECK64: aghi 4,320
127 define void @func2(ptr %ptr, ptr %vec_ptr) {
128 %l00 = load volatile double, ptr %ptr
129 %l01 = load volatile double, ptr %ptr
130 %l02 = load volatile double, ptr %ptr
131 %l03 = load volatile double, ptr %ptr
132 %l04 = load volatile double, ptr %ptr
133 %l05 = load volatile double, ptr %ptr
134 %l06 = load volatile double, ptr %ptr
135 %l07 = load volatile double, ptr %ptr
136 %l08 = load volatile double, ptr %ptr
137 %l09 = load volatile double, ptr %ptr
138 %l10 = load volatile double, ptr %ptr
139 %l11 = load volatile double, ptr %ptr
140 %l12 = load volatile double, ptr %ptr
141 %l13 = load volatile double, ptr %ptr
142 %l14 = load volatile double, ptr %ptr
143 %l15 = load volatile double, ptr %ptr
144 %add00 = fadd double %l01, %l00
145 %add01 = fadd double %l01, %add00
146 %add02 = fadd double %l02, %add01
147 %add03 = fadd double %l03, %add02
148 %add04 = fadd double %l04, %add03
149 %add05 = fadd double %l05, %add04
150 %add06 = fadd double %l06, %add05
151 %add07 = fadd double %l07, %add06
152 %add08 = fadd double %l08, %add07
153 %add09 = fadd double %l09, %add08
154 %add10 = fadd double %l10, %add09
155 %add11 = fadd double %l11, %add10
156 %add12 = fadd double %l12, %add11
157 %add13 = fadd double %l13, %add12
158 %add14 = fadd double %l14, %add13
159 %add15 = fadd double %l15, %add14
160 store volatile double %add00, ptr %ptr
161 store volatile double %add01, ptr %ptr
162 store volatile double %add02, ptr %ptr
163 store volatile double %add03, ptr %ptr
164 store volatile double %add04, ptr %ptr
165 store volatile double %add05, ptr %ptr
166 store volatile double %add06, ptr %ptr
167 store volatile double %add07, ptr %ptr
168 store volatile double %add08, ptr %ptr
169 store volatile double %add09, ptr %ptr
170 store volatile double %add10, ptr %ptr
171 store volatile double %add11, ptr %ptr
172 store volatile double %add12, ptr %ptr
173 store volatile double %add13, ptr %ptr
174 store volatile double %add14, ptr %ptr
175 store volatile double %add15, ptr %ptr
177 %v00 = load volatile <2 x i64>, ptr %vec_ptr
178 %v01 = load volatile <2 x i64>, ptr %vec_ptr
179 %v02 = load volatile <2 x i64>, ptr %vec_ptr
180 %v03 = load volatile <2 x i64>, ptr %vec_ptr
181 %v04 = load volatile <2 x i64>, ptr %vec_ptr
182 %v05 = load volatile <2 x i64>, ptr %vec_ptr
183 %v06 = load volatile <2 x i64>, ptr %vec_ptr
184 %v07 = load volatile <2 x i64>, ptr %vec_ptr
185 %v08 = load volatile <2 x i64>, ptr %vec_ptr
186 %v09 = load volatile <2 x i64>, ptr %vec_ptr
187 %v10 = load volatile <2 x i64>, ptr %vec_ptr
188 %v11 = load volatile <2 x i64>, ptr %vec_ptr
189 %v12 = load volatile <2 x i64>, ptr %vec_ptr
190 %v13 = load volatile <2 x i64>, ptr %vec_ptr
191 %v14 = load volatile <2 x i64>, ptr %vec_ptr
192 %v15 = load volatile <2 x i64>, ptr %vec_ptr
193 %v16 = load volatile <2 x i64>, ptr %vec_ptr
194 %v17 = load volatile <2 x i64>, ptr %vec_ptr
195 %v18 = load volatile <2 x i64>, ptr %vec_ptr
196 %v19 = load volatile <2 x i64>, ptr %vec_ptr
197 %v20 = load volatile <2 x i64>, ptr %vec_ptr
198 %v21 = load volatile <2 x i64>, ptr %vec_ptr
199 %v22 = load volatile <2 x i64>, ptr %vec_ptr
200 %v23 = load volatile <2 x i64>, ptr %vec_ptr
201 %v24 = load volatile <2 x i64>, ptr %vec_ptr
202 %v25 = load volatile <2 x i64>, ptr %vec_ptr
203 %v26 = load volatile <2 x i64>, ptr %vec_ptr
204 %v27 = load volatile <2 x i64>, ptr %vec_ptr
205 %v28 = load volatile <2 x i64>, ptr %vec_ptr
206 %v29 = load volatile <2 x i64>, ptr %vec_ptr
207 %v30 = load volatile <2 x i64>, ptr %vec_ptr
208 %v31 = load volatile <2 x i64>, ptr %vec_ptr
209 %vadd00 = add <2 x i64> %v00, %v00
210 %vadd01 = add <2 x i64> %v01, %vadd00
211 %vadd02 = add <2 x i64> %v02, %vadd01
212 %vadd03 = add <2 x i64> %v03, %vadd02
213 %vadd04 = add <2 x i64> %v04, %vadd03
214 %vadd05 = add <2 x i64> %v05, %vadd04
215 %vadd06 = add <2 x i64> %v06, %vadd05
216 %vadd07 = add <2 x i64> %v07, %vadd06
217 %vadd08 = add <2 x i64> %v08, %vadd07
218 %vadd09 = add <2 x i64> %v09, %vadd08
219 %vadd10 = add <2 x i64> %v10, %vadd09
220 %vadd11 = add <2 x i64> %v11, %vadd10
221 %vadd12 = add <2 x i64> %v12, %vadd11
222 %vadd13 = add <2 x i64> %v13, %vadd12
223 %vadd14 = add <2 x i64> %v14, %vadd13
224 %vadd15 = add <2 x i64> %v15, %vadd14
225 %vadd16 = add <2 x i64> %v16, %vadd15
226 %vadd17 = add <2 x i64> %v17, %vadd16
227 %vadd18 = add <2 x i64> %v18, %vadd17
228 %vadd19 = add <2 x i64> %v19, %vadd18
229 %vadd20 = add <2 x i64> %v20, %vadd19
230 %vadd21 = add <2 x i64> %v21, %vadd20
231 %vadd22 = add <2 x i64> %v22, %vadd21
232 %vadd23 = add <2 x i64> %v23, %vadd22
233 %vadd24 = add <2 x i64> %v24, %vadd23
234 %vadd25 = add <2 x i64> %v25, %vadd24
235 %vadd26 = add <2 x i64> %v26, %vadd25
236 %vadd27 = add <2 x i64> %v27, %vadd26
237 %vadd28 = add <2 x i64> %v28, %vadd27
238 %vadd29 = add <2 x i64> %v29, %vadd28
239 %vadd30 = add <2 x i64> %v30, %vadd29
240 %vadd31 = add <2 x i64> %v31, %vadd30
241 store volatile <2 x i64> %vadd00, ptr %vec_ptr
242 store volatile <2 x i64> %vadd01, ptr %vec_ptr
243 store volatile <2 x i64> %vadd02, ptr %vec_ptr
244 store volatile <2 x i64> %vadd03, ptr %vec_ptr
245 store volatile <2 x i64> %vadd04, ptr %vec_ptr
246 store volatile <2 x i64> %vadd05, ptr %vec_ptr
247 store volatile <2 x i64> %vadd06, ptr %vec_ptr
248 store volatile <2 x i64> %vadd07, ptr %vec_ptr
249 store volatile <2 x i64> %vadd08, ptr %vec_ptr
250 store volatile <2 x i64> %vadd09, ptr %vec_ptr
251 store volatile <2 x i64> %vadd10, ptr %vec_ptr
252 store volatile <2 x i64> %vadd11, ptr %vec_ptr
253 store volatile <2 x i64> %vadd12, ptr %vec_ptr
254 store volatile <2 x i64> %vadd13, ptr %vec_ptr
255 store volatile <2 x i64> %vadd14, ptr %vec_ptr
256 store volatile <2 x i64> %vadd15, ptr %vec_ptr
257 store volatile <2 x i64> %vadd16, ptr %vec_ptr
258 store volatile <2 x i64> %vadd17, ptr %vec_ptr
259 store volatile <2 x i64> %vadd18, ptr %vec_ptr
260 store volatile <2 x i64> %vadd19, ptr %vec_ptr
261 store volatile <2 x i64> %vadd20, ptr %vec_ptr
262 store volatile <2 x i64> %vadd21, ptr %vec_ptr
263 store volatile <2 x i64> %vadd22, ptr %vec_ptr
264 store volatile <2 x i64> %vadd23, ptr %vec_ptr
265 store volatile <2 x i64> %vadd24, ptr %vec_ptr
266 store volatile <2 x i64> %vadd25, ptr %vec_ptr
267 store volatile <2 x i64> %vadd26, ptr %vec_ptr
268 store volatile <2 x i64> %vadd27, ptr %vec_ptr
269 store volatile <2 x i64> %vadd28, ptr %vec_ptr
270 store volatile <2 x i64> %vadd29, ptr %vec_ptr
271 store volatile <2 x i64> %vadd30, ptr %vec_ptr
272 store volatile <2 x i64> %vadd31, ptr %vec_ptr
276 ; Big stack frame, force the use of agfi before stmg
277 ; despite not requiring stack extension routine.
278 ; CHECK64: agfi 4,-1040768
279 ; CHECK64: stmg 6,7,2064(4)
280 ; CHECK64: agfi 4,1040768
281 define void @func3() {
282 %arr = alloca [130070 x i64], align 8
283 call i64 (ptr) @fun1(ptr %arr)
287 ; Requires the saving of r4 due to variable sized
288 ; object in stack frame. (Eg: VLA) Sets up frame pointer in r8
289 ; CHECK64: stmg 4,10,1856(4)
290 ; CHECK64: aghi 4,-192
291 ; CHECK64: lg 6,40(5)
292 ; CHECK64: lg 5,32(5)
295 ; CHECK64-NEXT: bcr 0,0
296 ; CHECK64: lmg 4,10,2048(4)
297 define i64 @func4(i64 %n) {
298 %vla = alloca i64, i64 %n, align 8
299 %call = call i64 @fun2(i64 %n, ptr nonnull %vla, ptr nonnull %vla)
303 ; Require saving of r4 and in addition, a displacement large enough
304 ; to force use of agfi before stmg.
306 ; CHECK64: agfi 4,-1040224
307 ; CHECK64: stmg 4,10,2048(4)
310 ; CHECK64-NEXT: bcr 0,0
311 ; CHECK64: lmg 4,10,2048(4)
312 define i64 @func5(i64 %n) {
313 %vla = alloca i64, i64 %n, align 8
314 %arr = alloca [130000 x i64], align 8
315 %call = call i64 @fun2(i64 %n, ptr nonnull %vla, ptr %arr)
319 ; CHECK-LABEL: large_stack
320 ; CHECK64: agfi 4,-1048800
321 ; CHECK64-NEXT: llgt 3,1208
322 ; CHECK64-NEXT: cg 4,64(3)
325 ; CHECK64: lg 3,72(3)
327 ; CHECK64: stmg 6,7,2064(4)
328 define void @large_stack0() {
329 %arr = alloca [131072 x i64], align 8
330 call i64 (ptr) @fun1(ptr %arr)
334 ; CHECK-LABEL: large_stack1
335 ; CHECK64: agfi 4,-1048800
337 ; CHECK64: llgt 3,1208
338 ; CHECK64: cg 4,64(3)
339 ; CHECK64: jhe L#BB7_2
341 ; CHECK64: lg 3,72(3)
345 ; CHECK64: stmg 6,7,2064(4)
348 ; CHECK64: L#PPA1_large_stack1_0:
349 ; CHECK64: .short 6 * Length/4 of Parms
350 define void @large_stack1(i64 %n1, i64 %n2, i64 %n3) {
351 %arr = alloca [131072 x i64], align 8
352 call i64 (ptr, i64, i64, i64) @fun3(ptr %arr,
353 i64 %n1, i64 %n2, i64 %n3)
358 ; CHECK-LABEL: large_stack2
360 ; CHECK64: stg 3,2192(4)
361 ; CHECK64: agfi 4,-1048800
362 ; CHECK64: llgt 3,1208
363 ; CHECK64: cg 4,64(3)
364 ; CHECK64: jhe L#BB8_2
366 ; CHECK64: lg 3,72(3)
371 ; CHECK64: lg 3,2192(3)
372 ; CHECK64: stmg 4,12,2048(4)
374 define void @large_stack2(i64 %n1, i64 %n2, i64 %n3) {
375 %arr0 = alloca [131072 x i64], align 8
376 %arr1 = alloca i64, i64 %n1, align 8
377 call i64 (ptr, ptr, i64, i64, i64) @fun4(ptr %arr0,
378 ptr %arr1, i64 %n1, i64 %n2, i64 %n3)
382 ; CHECK-LABEL: leaf_func
383 ; CHECK: .long 8 * DSA Size 0x0
384 ; CHECK-NEXT: * Entry Flags
385 ; CHECK-NEXT: * Bit 1: 1 = Leaf function
386 ; CHECK-NEXT: * Bit 2: 0 = Does not use alloca
391 ; CHECK: aghik 3,1,-4
394 define i64 @leaf_func0(i64 %a, i64 %b, i64 %c) {
402 ; =============================
403 ; Tests for PPA1 Fields
404 ; =============================
405 ; CHECK-LABEL: named_func
406 ; CHECK: .byte 129 * PPA1 Flags 4
407 ; CHECK-NEXT: * Bit 7: 1 = Name Length and Name
408 define i64 @named_func(i64 %arg) {
409 %sum = add i64 1, %arg
413 ; CHECK-LABEL: __unnamed_1
414 ; CHECK: .byte 128 * PPA1 Flags 4
415 ; CHECK-NOT: * Bit 7: 1 = Name Length and Name
416 define void @""(ptr %p) {
417 call i64 (ptr) @fun1(ptr %p)
422 declare i64 @fun(i64 %arg0)
423 declare i64 @fun1(ptr %ptr)
424 declare i64 @fun2(i64 %n, ptr %arr0, ptr %arr1)
425 declare i64 @fun3(ptr %ptr, i64 %n1, i64 %n2, i64 %n3)
426 declare i64 @fun4(ptr %ptr0, ptr %ptr1, i64 %n1, i64 %n2, i64 %n3)