1 ; Test subtraction of a zero-extended i32 from an i64.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
8 define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
10 ; CHECK: slgfr %r3, %r4
11 ; CHECK-DAG: stg %r3, 0(%r5)
12 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
13 ; CHECK-DAG: afi [[REG]], -536870912
14 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
16 %bext = zext i32 %b to i64
17 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
18 %val = extractvalue {i64, i1} %t, 0
19 %obit = extractvalue {i64, i1} %t, 1
20 store i64 %val, i64 *%res
24 ; Check using the overflow result for a branch.
25 define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
27 ; CHECK: slgfr %r3, %r4
28 ; CHECK: stg %r3, 0(%r5)
31 %bext = zext i32 %b to i64
32 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
33 %val = extractvalue {i64, i1} %t, 0
34 %obit = extractvalue {i64, i1} %t, 1
35 store i64 %val, i64 *%res
36 br i1 %obit, label %call, label %exit
46 ; ... and the same with the inverted direction.
47 define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
49 ; CHECK: slgfr %r3, %r4
50 ; CHECK: stg %r3, 0(%r5)
51 ; CHECK: jgnle foo@PLT
53 %bext = zext i32 %b to i64
54 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
55 %val = extractvalue {i64, i1} %t, 0
56 %obit = extractvalue {i64, i1} %t, 1
57 store i64 %val, i64 *%res
58 br i1 %obit, label %exit, label %call
68 ; Check SLGF with no displacement.
69 define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
71 ; CHECK: slgf %r3, 0(%r4)
72 ; CHECK-DAG: stg %r3, 0(%r5)
73 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
74 ; CHECK-DAG: afi [[REG]], -536870912
75 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
77 %b = load i32, i32 *%src
78 %bext = zext i32 %b to i64
79 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
80 %val = extractvalue {i64, i1} %t, 0
81 %obit = extractvalue {i64, i1} %t, 1
82 store i64 %val, i64 *%res
86 ; Check the high end of the aligned SLGF range.
87 define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
89 ; CHECK: slgf %r3, 524284(%r4)
90 ; CHECK-DAG: stg %r3, 0(%r5)
91 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
92 ; CHECK-DAG: afi [[REG]], -536870912
93 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
95 %ptr = getelementptr i32, i32 *%src, i64 131071
96 %b = load i32, i32 *%ptr
97 %bext = zext i32 %b to i64
98 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
99 %val = extractvalue {i64, i1} %t, 0
100 %obit = extractvalue {i64, i1} %t, 1
101 store i64 %val, i64 *%res
105 ; Check the next doubleword up, which needs separate address logic.
106 ; Other sequences besides this one would be OK.
107 define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
109 ; CHECK: agfi %r4, 524288
110 ; CHECK: slgf %r3, 0(%r4)
111 ; CHECK-DAG: stg %r3, 0(%r5)
112 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
113 ; CHECK-DAG: afi [[REG]], -536870912
114 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
116 %ptr = getelementptr i32, i32 *%src, i64 131072
117 %b = load i32, i32 *%ptr
118 %bext = zext i32 %b to i64
119 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
120 %val = extractvalue {i64, i1} %t, 0
121 %obit = extractvalue {i64, i1} %t, 1
122 store i64 %val, i64 *%res
126 ; Check the high end of the negative aligned SLGF range.
127 define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
129 ; CHECK: slgf %r3, -4(%r4)
130 ; CHECK-DAG: stg %r3, 0(%r5)
131 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
132 ; CHECK-DAG: afi [[REG]], -536870912
133 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
135 %ptr = getelementptr i32, i32 *%src, i64 -1
136 %b = load i32, i32 *%ptr
137 %bext = zext i32 %b to i64
138 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
139 %val = extractvalue {i64, i1} %t, 0
140 %obit = extractvalue {i64, i1} %t, 1
141 store i64 %val, i64 *%res
145 ; Check the low end of the SLGF range.
146 define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
148 ; CHECK: slgf %r3, -524288(%r4)
149 ; CHECK-DAG: stg %r3, 0(%r5)
150 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
151 ; CHECK-DAG: afi [[REG]], -536870912
152 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
154 %ptr = getelementptr i32, i32 *%src, i64 -131072
155 %b = load i32, i32 *%ptr
156 %bext = zext i32 %b to i64
157 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
158 %val = extractvalue {i64, i1} %t, 0
159 %obit = extractvalue {i64, i1} %t, 1
160 store i64 %val, i64 *%res
164 ; Check the next doubleword down, which needs separate address logic.
165 ; Other sequences besides this one would be OK.
166 define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
168 ; CHECK: agfi %r4, -524292
169 ; CHECK: slgf %r3, 0(%r4)
170 ; CHECK-DAG: stg %r3, 0(%r5)
171 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
172 ; CHECK-DAG: afi [[REG]], -536870912
173 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
175 %ptr = getelementptr i32, i32 *%src, i64 -131073
176 %b = load i32, i32 *%ptr
177 %bext = zext i32 %b to i64
178 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
179 %val = extractvalue {i64, i1} %t, 0
180 %obit = extractvalue {i64, i1} %t, 1
181 store i64 %val, i64 *%res
185 ; Check that SLGF allows an index.
186 define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) {
188 ; CHECK: slgf %r4, 524284({{%r3,%r2|%r2,%r3}})
189 ; CHECK-DAG: stg %r4, 0(%r5)
190 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
191 ; CHECK-DAG: afi [[REG]], -536870912
192 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
194 %add1 = add i64 %src, %index
195 %add2 = add i64 %add1, 524284
196 %ptr = inttoptr i64 %add2 to i32 *
197 %b = load i32, i32 *%ptr
198 %bext = zext i32 %b to i64
199 %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext)
200 %val = extractvalue {i64, i1} %t, 0
201 %obit = extractvalue {i64, i1} %t, 1
202 store i64 %val, i64 *%res
206 ; Check that subtractions of spilled values can use SLGF rather than SLGFR.
207 define zeroext i1 @f11(i32 *%ptr0) {
209 ; CHECK: brasl %r14, foo@PLT
210 ; CHECK: slgf {{%r[0-9]+}}, 160(%r15)
212 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
213 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
214 %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
215 %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
216 %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
217 %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
218 %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
219 %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
220 %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
222 %val0 = load i32, i32 *%ptr0
223 %val1 = load i32, i32 *%ptr1
224 %val2 = load i32, i32 *%ptr2
225 %val3 = load i32, i32 *%ptr3
226 %val4 = load i32, i32 *%ptr4
227 %val5 = load i32, i32 *%ptr5
228 %val6 = load i32, i32 *%ptr6
229 %val7 = load i32, i32 *%ptr7
230 %val8 = load i32, i32 *%ptr8
231 %val9 = load i32, i32 *%ptr9
233 %frob0 = add i32 %val0, 100
234 %frob1 = add i32 %val1, 100
235 %frob2 = add i32 %val2, 100
236 %frob3 = add i32 %val3, 100
237 %frob4 = add i32 %val4, 100
238 %frob5 = add i32 %val5, 100
239 %frob6 = add i32 %val6, 100
240 %frob7 = add i32 %val7, 100
241 %frob8 = add i32 %val8, 100
242 %frob9 = add i32 %val9, 100
244 store i32 %frob0, i32 *%ptr0
245 store i32 %frob1, i32 *%ptr1
246 store i32 %frob2, i32 *%ptr2
247 store i32 %frob3, i32 *%ptr3
248 store i32 %frob4, i32 *%ptr4
249 store i32 %frob5, i32 *%ptr5
250 store i32 %frob6, i32 *%ptr6
251 store i32 %frob7, i32 *%ptr7
252 store i32 %frob8, i32 *%ptr8
253 store i32 %frob9, i32 *%ptr9
255 %ret = call i64 @foo()
257 %ext0 = zext i32 %frob0 to i64
258 %ext1 = zext i32 %frob1 to i64
259 %ext2 = zext i32 %frob2 to i64
260 %ext3 = zext i32 %frob3 to i64
261 %ext4 = zext i32 %frob4 to i64
262 %ext5 = zext i32 %frob5 to i64
263 %ext6 = zext i32 %frob6 to i64
264 %ext7 = zext i32 %frob7 to i64
265 %ext8 = zext i32 %frob8 to i64
266 %ext9 = zext i32 %frob9 to i64
268 %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %ret, i64 %ext0)
269 %add0 = extractvalue {i64, i1} %t0, 0
270 %obit0 = extractvalue {i64, i1} %t0, 1
271 %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add0, i64 %ext1)
272 %add1 = extractvalue {i64, i1} %t1, 0
273 %obit1 = extractvalue {i64, i1} %t1, 1
274 %res1 = or i1 %obit0, %obit1
275 %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add1, i64 %ext2)
276 %add2 = extractvalue {i64, i1} %t2, 0
277 %obit2 = extractvalue {i64, i1} %t2, 1
278 %res2 = or i1 %res1, %obit2
279 %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add2, i64 %ext3)
280 %add3 = extractvalue {i64, i1} %t3, 0
281 %obit3 = extractvalue {i64, i1} %t3, 1
282 %res3 = or i1 %res2, %obit3
283 %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add3, i64 %ext4)
284 %add4 = extractvalue {i64, i1} %t4, 0
285 %obit4 = extractvalue {i64, i1} %t4, 1
286 %res4 = or i1 %res3, %obit4
287 %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add4, i64 %ext5)
288 %add5 = extractvalue {i64, i1} %t5, 0
289 %obit5 = extractvalue {i64, i1} %t5, 1
290 %res5 = or i1 %res4, %obit5
291 %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add5, i64 %ext6)
292 %add6 = extractvalue {i64, i1} %t6, 0
293 %obit6 = extractvalue {i64, i1} %t6, 1
294 %res6 = or i1 %res5, %obit6
295 %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add6, i64 %ext7)
296 %add7 = extractvalue {i64, i1} %t7, 0
297 %obit7 = extractvalue {i64, i1} %t7, 1
298 %res7 = or i1 %res6, %obit7
299 %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add7, i64 %ext8)
300 %add8 = extractvalue {i64, i1} %t8, 0
301 %obit8 = extractvalue {i64, i1} %t8, 1
302 %res8 = or i1 %res7, %obit8
303 %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add8, i64 %ext9)
304 %add9 = extractvalue {i64, i1} %t9, 0
305 %obit9 = extractvalue {i64, i1} %t9, 1
306 %res9 = or i1 %res8, %obit9
311 declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone