1 ; Test 32-bit subtraction in which the second operand is variable.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
8 define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, ptr %res) {
11 ; CHECK-DAG: st %r3, 0(%r5)
12 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
13 ; CHECK-DAG: afi [[REG]], 1342177280
14 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
16 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
17 %val = extractvalue {i32, i1} %t, 0
18 %obit = extractvalue {i32, i1} %t, 1
19 store i32 %val, ptr %res
23 ; Check using the overflow result for a branch.
24 define void @f2(i32 %dummy, i32 %a, i32 %b, ptr %res) {
27 ; CHECK: st %r3, 0(%r5)
30 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
31 %val = extractvalue {i32, i1} %t, 0
32 %obit = extractvalue {i32, i1} %t, 1
33 store i32 %val, ptr %res
34 br i1 %obit, label %call, label %exit
44 ; ... and the same with the inverted direction.
45 define void @f3(i32 %dummy, i32 %a, i32 %b, ptr %res) {
48 ; CHECK: st %r3, 0(%r5)
51 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
52 %val = extractvalue {i32, i1} %t, 0
53 %obit = extractvalue {i32, i1} %t, 1
54 store i32 %val, ptr %res
55 br i1 %obit, label %exit, label %call
65 ; Check the low end of the S range.
66 define zeroext i1 @f4(i32 %dummy, i32 %a, ptr %src, ptr %res) {
68 ; CHECK: s %r3, 0(%r4)
69 ; CHECK-DAG: st %r3, 0(%r5)
70 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
71 ; CHECK-DAG: afi [[REG]], 1342177280
72 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
74 %b = load i32, ptr %src
75 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
76 %val = extractvalue {i32, i1} %t, 0
77 %obit = extractvalue {i32, i1} %t, 1
78 store i32 %val, ptr %res
82 ; Check the high end of the aligned S range.
83 define zeroext i1 @f5(i32 %dummy, i32 %a, ptr %src, ptr %res) {
85 ; CHECK: s %r3, 4092(%r4)
86 ; CHECK-DAG: st %r3, 0(%r5)
87 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
88 ; CHECK-DAG: afi [[REG]], 1342177280
89 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
91 %ptr = getelementptr i32, ptr %src, i64 1023
92 %b = load i32, ptr %ptr
93 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
94 %val = extractvalue {i32, i1} %t, 0
95 %obit = extractvalue {i32, i1} %t, 1
96 store i32 %val, ptr %res
100 ; Check the next word up, which should use SY instead of S.
101 define zeroext i1 @f6(i32 %dummy, i32 %a, ptr %src, ptr %res) {
103 ; CHECK: sy %r3, 4096(%r4)
104 ; CHECK-DAG: st %r3, 0(%r5)
105 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
106 ; CHECK-DAG: afi [[REG]], 1342177280
107 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
109 %ptr = getelementptr i32, ptr %src, i64 1024
110 %b = load i32, ptr %ptr
111 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
112 %val = extractvalue {i32, i1} %t, 0
113 %obit = extractvalue {i32, i1} %t, 1
114 store i32 %val, ptr %res
118 ; Check the high end of the aligned SY range.
119 define zeroext i1 @f7(i32 %dummy, i32 %a, ptr %src, ptr %res) {
121 ; CHECK: sy %r3, 524284(%r4)
122 ; CHECK-DAG: st %r3, 0(%r5)
123 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
124 ; CHECK-DAG: afi [[REG]], 1342177280
125 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
127 %ptr = getelementptr i32, ptr %src, i64 131071
128 %b = load i32, ptr %ptr
129 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
130 %val = extractvalue {i32, i1} %t, 0
131 %obit = extractvalue {i32, i1} %t, 1
132 store i32 %val, ptr %res
136 ; Check the next word up, which needs separate address logic.
137 ; Other sequences besides this one would be OK.
138 define zeroext i1 @f8(i32 %dummy, i32 %a, ptr %src, ptr %res) {
140 ; CHECK: agfi %r4, 524288
141 ; CHECK: s %r3, 0(%r4)
142 ; CHECK-DAG: st %r3, 0(%r5)
143 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
144 ; CHECK-DAG: afi [[REG]], 1342177280
145 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
147 %ptr = getelementptr i32, ptr %src, i64 131072
148 %b = load i32, ptr %ptr
149 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
150 %val = extractvalue {i32, i1} %t, 0
151 %obit = extractvalue {i32, i1} %t, 1
152 store i32 %val, ptr %res
156 ; Check the high end of the negative aligned SY range.
157 define zeroext i1 @f9(i32 %dummy, i32 %a, ptr %src, ptr %res) {
159 ; CHECK: sy %r3, -4(%r4)
160 ; CHECK-DAG: st %r3, 0(%r5)
161 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
162 ; CHECK-DAG: afi [[REG]], 1342177280
163 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
165 %ptr = getelementptr i32, ptr %src, i64 -1
166 %b = load i32, ptr %ptr
167 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
168 %val = extractvalue {i32, i1} %t, 0
169 %obit = extractvalue {i32, i1} %t, 1
170 store i32 %val, ptr %res
174 ; Check the low end of the SY range.
175 define zeroext i1 @f10(i32 %dummy, i32 %a, ptr %src, ptr %res) {
177 ; CHECK: sy %r3, -524288(%r4)
178 ; CHECK-DAG: st %r3, 0(%r5)
179 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
180 ; CHECK-DAG: afi [[REG]], 1342177280
181 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
183 %ptr = getelementptr i32, ptr %src, i64 -131072
184 %b = load i32, ptr %ptr
185 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
186 %val = extractvalue {i32, i1} %t, 0
187 %obit = extractvalue {i32, i1} %t, 1
188 store i32 %val, ptr %res
192 ; Check the next word down, which needs separate address logic.
193 ; Other sequences besides this one would be OK.
194 define zeroext i1 @f11(i32 %dummy, i32 %a, ptr %src, ptr %res) {
196 ; CHECK: agfi %r4, -524292
197 ; CHECK: s %r3, 0(%r4)
198 ; CHECK-DAG: st %r3, 0(%r5)
199 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
200 ; CHECK-DAG: afi [[REG]], 1342177280
201 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
203 %ptr = getelementptr i32, ptr %src, i64 -131073
204 %b = load i32, ptr %ptr
205 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
206 %val = extractvalue {i32, i1} %t, 0
207 %obit = extractvalue {i32, i1} %t, 1
208 store i32 %val, ptr %res
212 ; Check that S allows an index.
213 define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, ptr %res) {
215 ; CHECK: s %r4, 4092({{%r3,%r2|%r2,%r3}})
216 ; CHECK-DAG: st %r4, 0(%r5)
217 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
218 ; CHECK-DAG: afi [[REG]], 1342177280
219 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
221 %add1 = add i64 %src, %index
222 %add2 = add i64 %add1, 4092
223 %ptr = inttoptr i64 %add2 to ptr
224 %b = load i32, ptr %ptr
225 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
226 %val = extractvalue {i32, i1} %t, 0
227 %obit = extractvalue {i32, i1} %t, 1
228 store i32 %val, ptr %res
232 ; Check that SY allows an index.
233 define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, ptr %res) {
235 ; CHECK: sy %r4, 4096({{%r3,%r2|%r2,%r3}})
236 ; CHECK-DAG: st %r4, 0(%r5)
237 ; CHECK-DAG: ipm [[REG:%r[0-5]]]
238 ; CHECK-DAG: afi [[REG]], 1342177280
239 ; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
241 %add1 = add i64 %src, %index
242 %add2 = add i64 %add1, 4096
243 %ptr = inttoptr i64 %add2 to ptr
244 %b = load i32, ptr %ptr
245 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
246 %val = extractvalue {i32, i1} %t, 0
247 %obit = extractvalue {i32, i1} %t, 1
248 store i32 %val, ptr %res
252 ; Check that subtractions of spilled values can use S rather than SR.
253 define zeroext i1 @f14(ptr %ptr0) {
255 ; CHECK: brasl %r14, foo@PLT
256 ; CHECK: s %r2, 16{{[04]}}(%r15)
258 %ptr1 = getelementptr i32, ptr %ptr0, i64 2
259 %ptr2 = getelementptr i32, ptr %ptr0, i64 4
260 %ptr3 = getelementptr i32, ptr %ptr0, i64 6
261 %ptr4 = getelementptr i32, ptr %ptr0, i64 8
262 %ptr5 = getelementptr i32, ptr %ptr0, i64 10
263 %ptr6 = getelementptr i32, ptr %ptr0, i64 12
264 %ptr7 = getelementptr i32, ptr %ptr0, i64 14
265 %ptr8 = getelementptr i32, ptr %ptr0, i64 16
266 %ptr9 = getelementptr i32, ptr %ptr0, i64 18
268 %val0 = load i32, ptr %ptr0
269 %val1 = load i32, ptr %ptr1
270 %val2 = load i32, ptr %ptr2
271 %val3 = load i32, ptr %ptr3
272 %val4 = load i32, ptr %ptr4
273 %val5 = load i32, ptr %ptr5
274 %val6 = load i32, ptr %ptr6
275 %val7 = load i32, ptr %ptr7
276 %val8 = load i32, ptr %ptr8
277 %val9 = load i32, ptr %ptr9
279 %ret = call i32 @foo()
281 %t0 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %ret, i32 %val0)
282 %add0 = extractvalue {i32, i1} %t0, 0
283 %obit0 = extractvalue {i32, i1} %t0, 1
284 %t1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add0, i32 %val1)
285 %add1 = extractvalue {i32, i1} %t1, 0
286 %obit1 = extractvalue {i32, i1} %t1, 1
287 %res1 = or i1 %obit0, %obit1
288 %t2 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add1, i32 %val2)
289 %add2 = extractvalue {i32, i1} %t2, 0
290 %obit2 = extractvalue {i32, i1} %t2, 1
291 %res2 = or i1 %res1, %obit2
292 %t3 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add2, i32 %val3)
293 %add3 = extractvalue {i32, i1} %t3, 0
294 %obit3 = extractvalue {i32, i1} %t3, 1
295 %res3 = or i1 %res2, %obit3
296 %t4 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add3, i32 %val4)
297 %add4 = extractvalue {i32, i1} %t4, 0
298 %obit4 = extractvalue {i32, i1} %t4, 1
299 %res4 = or i1 %res3, %obit4
300 %t5 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add4, i32 %val5)
301 %add5 = extractvalue {i32, i1} %t5, 0
302 %obit5 = extractvalue {i32, i1} %t5, 1
303 %res5 = or i1 %res4, %obit5
304 %t6 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add5, i32 %val6)
305 %add6 = extractvalue {i32, i1} %t6, 0
306 %obit6 = extractvalue {i32, i1} %t6, 1
307 %res6 = or i1 %res5, %obit6
308 %t7 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add6, i32 %val7)
309 %add7 = extractvalue {i32, i1} %t7, 0
310 %obit7 = extractvalue {i32, i1} %t7, 1
311 %res7 = or i1 %res6, %obit7
312 %t8 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add7, i32 %val8)
313 %add8 = extractvalue {i32, i1} %t8, 0
314 %obit8 = extractvalue {i32, i1} %t8, 1
315 %res8 = or i1 %res7, %obit8
316 %t9 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add8, i32 %val9)
317 %add9 = extractvalue {i32, i1} %t9, 0
318 %obit9 = extractvalue {i32, i1} %t9, 1
319 %res9 = or i1 %res8, %obit9
324 declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone