3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-block-placement | FileCheck %s
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-block-placement \
5 ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-Z13
7 ; Test that strength reduction is applied to addresses with a scale factor,
8 ; but that indexed addressing can still be used.
9 define void @f1(ptr %dest, i32 %a) {
12 ; CHECK: st %r3, 400({{%r[1-5],%r[1-5]}})
18 %index = phi i64 [ 0, %entry ], [ %next, %loop ]
19 %ptr = getelementptr i32, ptr %dest, i64 %index
20 store i32 %a, ptr %ptr
21 %next = add i64 %index, 1
22 %cmp = icmp ne i64 %next, 100
23 br i1 %cmp, label %loop, label %exit
29 ; Test a loop that should be converted into dbr form and then use BRCT.
30 define void @f2(ptr %src, ptr %dest) {
32 ; CHECK: lhi [[REG:%r[0-5]]], 100
33 ; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
34 ; CHECK: brct [[REG]], [[LABEL]]
40 %count = phi i32 [ 0, %entry ], [ %next, %loop.next ]
41 %next = add i32 %count, 1
42 %val = load volatile i32, ptr %src
43 %cmp = icmp eq i32 %val, 0
44 br i1 %cmp, label %loop.next, label %loop.store
47 %add = add i32 %val, 1
48 store volatile i32 %add, ptr %dest
52 %cont = icmp ne i32 %next, 100
53 br i1 %cont, label %loop, label %exit
59 ; Like f2, but for BRCTG.
60 define void @f3(ptr %src, ptr %dest) {
62 ; CHECK: lghi [[REG:%r[0-5]]], 100
63 ; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
64 ; CHECK: brctg [[REG]], [[LABEL]]
70 %count = phi i64 [ 0, %entry ], [ %next, %loop.next ]
71 %next = add i64 %count, 1
72 %val = load volatile i64, ptr %src
73 %cmp = icmp eq i64 %val, 0
74 br i1 %cmp, label %loop.next, label %loop.store
77 %add = add i64 %val, 1
78 store volatile i64 %add, ptr %dest
82 %cont = icmp ne i64 %next, 100
83 br i1 %cont, label %loop, label %exit
89 ; Test a loop with a 64-bit decremented counter in which the 32-bit
90 ; low part of the counter is used after the decrement. This is an example
91 ; of a subregister use being the only thing that blocks a conversion to BRCTG.
92 define void @f4(ptr %src, ptr %dest, ptr %dest2, i64 %count) {
94 ; CHECK: aghi [[REG:%r[0-5]]], -1
95 ; CHECK: lr [[REG2:%r[0-5]]], [[REG]]
96 ; CHECK: stg [[REG2]],
103 %left = phi i64 [ %count, %entry ], [ %next, %loop.next ]
104 store volatile i64 %left, ptr %dest2
105 %val = load volatile i32, ptr %src
106 %cmp = icmp eq i32 %val, 0
107 br i1 %cmp, label %loop.next, label %loop.store
110 %add = add i32 %val, 1
111 store volatile i32 %add, ptr %dest
115 %next = add i64 %left, -1
116 %ext = zext i32 %val to i64
117 %shl = shl i64 %ext, 32
118 %and = and i64 %next, 4294967295
119 %or = or i64 %shl, %and
120 store volatile i64 %or, ptr %dest2
121 %cont = icmp ne i64 %next, 0
122 br i1 %cont, label %loop, label %exit
128 ; Test that negative offsets are avoided for loads of floating point.
129 %s.float = type { float, float, float }
130 define void @f5(ptr nocapture %a,
131 ptr nocapture readonly %b,
133 ; CHECK-Z13-LABEL: f5:
134 ; CHECK-Z13-NOT: -{{[0-9]+}}(%r
137 %cmp9 = icmp eq i32 %S, 0
138 br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader
140 for.body.preheader: ; preds = %entry
143 for.cond.cleanup.loopexit: ; preds = %for.body
144 br label %for.cond.cleanup
146 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
149 for.body: ; preds = %for.body.preheader, %for.body
150 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
151 %a1 = getelementptr inbounds %s.float, ptr %b, i64 %indvars.iv, i32 0
152 %tmp = load float, ptr %a1, align 4
153 %b4 = getelementptr inbounds %s.float, ptr %b, i64 %indvars.iv, i32 1
154 %tmp1 = load float, ptr %b4, align 4
155 %add = fadd float %tmp, %tmp1
156 %c = getelementptr inbounds %s.float, ptr %b, i64 %indvars.iv, i32 2
157 %tmp2 = load float, ptr %c, align 4
158 %add7 = fadd float %add, %tmp2
159 %a10 = getelementptr inbounds %s.float, ptr %a, i64 %indvars.iv, i32 0
160 store float %add7, ptr %a10, align 4
161 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
162 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
163 %exitcond = icmp eq i32 %lftr.wideiv, %S
164 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
167 ; Test that negative offsets are avoided for loads of double.
168 %s.double = type { double, double, double }
169 define void @f6(ptr nocapture %a,
170 ptr nocapture readonly %b,
172 ; CHECK-Z13-LABEL: f6:
173 ; CHECK-Z13-NOT: -{{[0-9]+}}(%r
175 %cmp9 = icmp eq i32 %S, 0
176 br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader
178 for.body.preheader: ; preds = %entry
181 for.cond.cleanup.loopexit: ; preds = %for.body
182 br label %for.cond.cleanup
184 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
187 for.body: ; preds = %for.body.preheader, %for.body
188 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
189 %a1 = getelementptr inbounds %s.double, ptr %b, i64 %indvars.iv, i32 0
190 %tmp = load double, ptr %a1, align 4
191 %b4 = getelementptr inbounds %s.double, ptr %b, i64 %indvars.iv, i32 1
192 %tmp1 = load double, ptr %b4, align 4
193 %add = fadd double %tmp, %tmp1
194 %c = getelementptr inbounds %s.double, ptr %b, i64 %indvars.iv, i32 2
195 %tmp2 = load double, ptr %c, align 4
196 %add7 = fadd double %add, %tmp2
197 %a10 = getelementptr inbounds %s.double, ptr %a, i64 %indvars.iv, i32 0
198 store double %add7, ptr %a10, align 4
199 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
200 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
201 %exitcond = icmp eq i32 %lftr.wideiv, %S
202 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
205 ; Test that negative offsets are avoided for memory accesses of vector type.
206 %s.vec = type { <4 x i32>, <4 x i32>, <4 x i32> }
207 define void @f7(ptr nocapture %a,
208 ptr nocapture readonly %b,
210 ; CHECK-Z13-LABEL: f7:
211 ; CHECK-Z13-NOT: -{{[0-9]+}}(%r
213 %cmp9 = icmp eq i32 %S, 0
214 br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader
216 for.body.preheader: ; preds = %entry
219 for.cond.cleanup.loopexit: ; preds = %for.body
220 br label %for.cond.cleanup
222 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
225 for.body: ; preds = %for.body.preheader, %for.body
226 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
227 %a1 = getelementptr inbounds %s.vec, ptr %b, i64 %indvars.iv, i32 0
228 %tmp = load <4 x i32>, ptr %a1, align 4
229 %b4 = getelementptr inbounds %s.vec, ptr %b, i64 %indvars.iv, i32 1
230 %tmp1 = load <4 x i32>, ptr %b4, align 4
231 %add = add <4 x i32> %tmp1, %tmp
232 %c = getelementptr inbounds %s.vec, ptr %b, i64 %indvars.iv, i32 2
233 %tmp2 = load <4 x i32>, ptr %c, align 4
234 %add7 = add <4 x i32> %add, %tmp2
235 %a10 = getelementptr inbounds %s.vec, ptr %a, i64 %indvars.iv, i32 0
236 store <4 x i32> %add7, ptr %a10, align 4
237 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
238 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
239 %exitcond = icmp eq i32 %lftr.wideiv, %S
240 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
243 ; Test that a memcpy loop does not get a lot of lays before each mvc (D12 and no index-reg).
244 %0 = type { %1, ptr }
245 %1 = type { ptr, ptr }
246 %2 = type <{ %3, i32, [4 x i8] }>
247 %3 = type { ptr, ptr, ptr }
249 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #0
252 ; CHECK-Z13-LABEL: f8:
254 ; CHECK-Z13-NEXT: mvc
255 ; CHECK-Z13-NEXT: mvc
256 ; CHECK-Z13-NEXT: mvc
259 %tmp = load ptr, ptr undef, align 8
260 br i1 undef, label %bb2, label %bb1
265 bb2: ; preds = %bb1, %bb
266 %tmp3 = phi ptr [ %tmp, %bb ], [ undef, %bb1 ]
267 %tmp4 = phi ptr [ undef, %bb ], [ undef, %bb1 ]
270 bb5: ; preds = %bb5, %bb2
271 %tmp6 = phi ptr [ %tmp21, %bb5 ], [ %tmp3, %bb2 ]
272 %tmp7 = phi ptr [ %tmp20, %bb5 ], [ %tmp4, %bb2 ]
273 %tmp8 = getelementptr inbounds %0, ptr %tmp7, i64 -1
274 %tmp9 = getelementptr inbounds %0, ptr %tmp6, i64 -1
275 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp9, ptr align 8 %tmp8, i64 24, i1 false)
276 %tmp12 = getelementptr inbounds %0, ptr %tmp7, i64 -2
277 %tmp13 = getelementptr inbounds %0, ptr %tmp6, i64 -2
278 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp13, ptr align 8 %tmp12, i64 24, i1 false)
279 %tmp16 = getelementptr inbounds %0, ptr %tmp7, i64 -3
280 %tmp17 = getelementptr inbounds %0, ptr %tmp6, i64 -3
281 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp17, ptr align 8 %tmp16, i64 24, i1 false)
282 %tmp20 = getelementptr inbounds %0, ptr %tmp7, i64 -4
283 %tmp21 = getelementptr inbounds %0, ptr %tmp6, i64 -4
284 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp21, ptr align 8 %tmp20, i64 24, i1 false)
288 ; Test that a chsi does not need an aghik inside the loop (no index reg)
290 ; CHECK-Z13-LABEL: f9:
291 ; CHECK-Z13: # =>This Inner Loop Header: Depth=1
292 ; CHECK-Z13-NOT: aghik
296 br label %for.body.i63
298 for.body.i63: ; preds = %for.inc.i, %entry
299 %indvars.iv155.i = phi i64 [ 0, %entry ], [ %indvars.iv.next156.i.3, %for.inc.i ]
300 %arrayidx.i62 = getelementptr inbounds i32, ptr undef, i64 %indvars.iv155.i
301 %tmp = load i32, ptr %arrayidx.i62, align 4
302 %cmp9.i = icmp eq i32 %tmp, 0
303 br i1 %cmp9.i, label %for.inc.i, label %if.then10.i
305 if.then10.i: ; preds = %for.body.i63
308 for.inc.i: ; preds = %for.body.i63
309 %indvars.iv.next156.i = or i64 %indvars.iv155.i, 1
310 %arrayidx.i62.1 = getelementptr inbounds i32, ptr undef, i64 %indvars.iv.next156.i
311 %tmp1 = load i32, ptr %arrayidx.i62.1, align 4
312 %indvars.iv.next156.i.3 = add nsw i64 %indvars.iv155.i, 4
313 br label %for.body.i63