1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -arm-memtransfer-tploop=force-enabled %s -o - | FileCheck %s
4 ; In this test, the successors of various blocks were becoming invalid after
5 ; ifcvt as the blocks did not properly fall through to the successor after a
8 @arr_183 = external dso_local local_unnamed_addr global [20 x [23 x [19 x i8]]], align 1
9 define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly %d) {
11 ; CHECK: @ %bb.0: @ %entry
12 ; CHECK-NEXT: .save {r4, r5, r7, lr}
13 ; CHECK-NEXT: push {r4, r5, r7, lr}
14 ; CHECK-NEXT: cmp r0, #2
15 ; CHECK-NEXT: bls.w .LBB0_12
16 ; CHECK-NEXT: @ %bb.1: @ %for.body.us.preheader
17 ; CHECK-NEXT: movw r5, :lower16:arr_183
18 ; CHECK-NEXT: movs r3, #0
19 ; CHECK-NEXT: movt r5, :upper16:arr_183
20 ; CHECK-NEXT: mov.w r12, #19
21 ; CHECK-NEXT: vmov.i32 q0, #0x0
22 ; CHECK-NEXT: vmov.i32 q1, #0x0
23 ; CHECK-NEXT: vmov.i32 q2, #0x0
24 ; CHECK-NEXT: vmov.i32 q3, #0x0
25 ; CHECK-NEXT: b .LBB0_3
26 ; CHECK-NEXT: .LBB0_2: @ %land.end.us.3
27 ; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
28 ; CHECK-NEXT: movs r3, #1
29 ; CHECK-NEXT: .LBB0_3: @ %for.body.us
30 ; CHECK-NEXT: @ =>This Loop Header: Depth=1
31 ; CHECK-NEXT: @ Child Loop BB0_4 Depth 2
32 ; CHECK-NEXT: @ Child Loop BB0_6 Depth 2
33 ; CHECK-NEXT: @ Child Loop BB0_8 Depth 2
34 ; CHECK-NEXT: @ Child Loop BB0_11 Depth 2
35 ; CHECK-NEXT: ldr.w r0, [r2, r3, lsl #2]
36 ; CHECK-NEXT: cmp r0, #0
38 ; CHECK-NEXT: ldrbne r0, [r1, r3]
39 ; CHECK-NEXT: moveq r0, #0
40 ; CHECK-NEXT: mla r3, r3, r12, r5
41 ; CHECK-NEXT: add r3, r0
42 ; CHECK-NEXT: rsb.w r0, r0, #108
43 ; CHECK-NEXT: wlstp.8 lr, r0, .LBB0_5
44 ; CHECK-NEXT: .LBB0_4: @ Parent Loop BB0_3 Depth=1
45 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
46 ; CHECK-NEXT: vstrb.8 q0, [r3], #16
47 ; CHECK-NEXT: letp lr, .LBB0_4
48 ; CHECK-NEXT: .LBB0_5: @ %land.end.us
49 ; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
50 ; CHECK-NEXT: ldr r0, [r2, #4]
51 ; CHECK-NEXT: cmp r0, #0
53 ; CHECK-NEXT: ldrbne r0, [r1, #1]
54 ; CHECK-NEXT: moveq r0, #0
55 ; CHECK-NEXT: adds r3, r5, r0
56 ; CHECK-NEXT: rsb.w r0, r0, #108
57 ; CHECK-NEXT: adds r3, #19
58 ; CHECK-NEXT: wlstp.8 lr, r0, .LBB0_7
59 ; CHECK-NEXT: .LBB0_6: @ Parent Loop BB0_3 Depth=1
60 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
61 ; CHECK-NEXT: vstrb.8 q1, [r3], #16
62 ; CHECK-NEXT: letp lr, .LBB0_6
63 ; CHECK-NEXT: .LBB0_7: @ %land.end.us.1
64 ; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
65 ; CHECK-NEXT: ldr r0, [r2, #4]
66 ; CHECK-NEXT: cmp r0, #0
68 ; CHECK-NEXT: ldrbne r0, [r1, #1]
69 ; CHECK-NEXT: moveq r0, #0
70 ; CHECK-NEXT: adds r3, r5, r0
71 ; CHECK-NEXT: rsb.w r0, r0, #108
72 ; CHECK-NEXT: adds r3, #19
73 ; CHECK-NEXT: wlstp.8 lr, r0, .LBB0_9
74 ; CHECK-NEXT: .LBB0_8: @ Parent Loop BB0_3 Depth=1
75 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
76 ; CHECK-NEXT: vstrb.8 q2, [r3], #16
77 ; CHECK-NEXT: letp lr, .LBB0_8
78 ; CHECK-NEXT: .LBB0_9: @ %land.end.us.2
79 ; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
80 ; CHECK-NEXT: ldr r0, [r2, #4]
81 ; CHECK-NEXT: cmp r0, #0
83 ; CHECK-NEXT: ldrbne r0, [r1, #1]
84 ; CHECK-NEXT: moveq r0, #0
85 ; CHECK-NEXT: adds r3, r5, r0
86 ; CHECK-NEXT: rsb.w r0, r0, #108
87 ; CHECK-NEXT: add.w r4, r0, #15
88 ; CHECK-NEXT: adds r3, #19
89 ; CHECK-NEXT: lsrs r4, r4, #4
90 ; CHECK-NEXT: cmp.w r4, #0
91 ; CHECK-NEXT: beq .LBB0_2
92 ; CHECK-NEXT: @ %bb.10: @ %land.end.us.2
93 ; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1
94 ; CHECK-NEXT: dlstp.8 lr, r0
95 ; CHECK-NEXT: .LBB0_11: @ Parent Loop BB0_3 Depth=1
96 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
97 ; CHECK-NEXT: vstrb.8 q3, [r3], #16
98 ; CHECK-NEXT: letp lr, .LBB0_11
99 ; CHECK-NEXT: b .LBB0_2
100 ; CHECK-NEXT: .LBB0_12:
101 ; CHECK-NEXT: movw r12, :lower16:arr_183
102 ; CHECK-NEXT: vmov.i32 q0, #0x0
103 ; CHECK-NEXT: movt r12, :upper16:arr_183
104 ; CHECK-NEXT: vmov.i32 q1, #0x0
105 ; CHECK-NEXT: vmov.i32 q2, #0x0
106 ; CHECK-NEXT: vmov.i32 q3, #0x0
107 ; CHECK-NEXT: b .LBB0_14
108 ; CHECK-NEXT: .LBB0_13: @ %for.body.lr.ph.3
109 ; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1
110 ; CHECK-NEXT: ldr r3, [r2, #4]
111 ; CHECK-NEXT: cmp r3, #0
113 ; CHECK-NEXT: ldrbne r3, [r1, #1]
114 ; CHECK-NEXT: moveq r3, #0
115 ; CHECK-NEXT: add.w r5, r12, r3
116 ; CHECK-NEXT: rsb.w r3, r3, #108
117 ; CHECK-NEXT: add.w r4, r5, #19
118 ; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_14
119 ; CHECK-NEXT: b .LBB0_24
120 ; CHECK-NEXT: .LBB0_14: @ %for.cond
121 ; CHECK-NEXT: @ =>This Loop Header: Depth=1
122 ; CHECK-NEXT: @ Child Loop BB0_16 Depth 2
123 ; CHECK-NEXT: @ Child Loop BB0_19 Depth 2
124 ; CHECK-NEXT: @ Child Loop BB0_22 Depth 2
125 ; CHECK-NEXT: @ Child Loop BB0_24 Depth 2
126 ; CHECK-NEXT: cmp r0, #2
127 ; CHECK-NEXT: blo .LBB0_17
128 ; CHECK-NEXT: @ %bb.15: @ %for.body.lr.ph
129 ; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1
130 ; CHECK-NEXT: ldr r3, [r2, #4]
131 ; CHECK-NEXT: cmp r3, #0
133 ; CHECK-NEXT: ldrbne r3, [r1, #1]
134 ; CHECK-NEXT: moveq r3, #0
135 ; CHECK-NEXT: add.w r5, r12, r3
136 ; CHECK-NEXT: rsb.w r3, r3, #108
137 ; CHECK-NEXT: add.w r4, r5, #19
138 ; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_17
139 ; CHECK-NEXT: .LBB0_16: @ Parent Loop BB0_14 Depth=1
140 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
141 ; CHECK-NEXT: vstrb.8 q0, [r4], #16
142 ; CHECK-NEXT: letp lr, .LBB0_16
143 ; CHECK-NEXT: .LBB0_17: @ %for.cond.backedge
144 ; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1
145 ; CHECK-NEXT: cmp r0, #2
146 ; CHECK-NEXT: blo .LBB0_20
147 ; CHECK-NEXT: @ %bb.18: @ %for.body.lr.ph.1
148 ; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1
149 ; CHECK-NEXT: ldr r3, [r2, #4]
150 ; CHECK-NEXT: cmp r3, #0
152 ; CHECK-NEXT: ldrbne r3, [r1, #1]
153 ; CHECK-NEXT: moveq r3, #0
154 ; CHECK-NEXT: add.w r5, r12, r3
155 ; CHECK-NEXT: rsb.w r3, r3, #108
156 ; CHECK-NEXT: add.w r4, r5, #19
157 ; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_20
158 ; CHECK-NEXT: .LBB0_19: @ Parent Loop BB0_14 Depth=1
159 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
160 ; CHECK-NEXT: vstrb.8 q1, [r4], #16
161 ; CHECK-NEXT: letp lr, .LBB0_19
162 ; CHECK-NEXT: .LBB0_20: @ %for.cond.backedge.1
163 ; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1
164 ; CHECK-NEXT: cmp r0, #2
165 ; CHECK-NEXT: blo .LBB0_23
166 ; CHECK-NEXT: @ %bb.21: @ %for.body.lr.ph.2
167 ; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1
168 ; CHECK-NEXT: ldr r3, [r2, #4]
169 ; CHECK-NEXT: cmp r3, #0
171 ; CHECK-NEXT: ldrbne r3, [r1, #1]
172 ; CHECK-NEXT: moveq r3, #0
173 ; CHECK-NEXT: add.w r5, r12, r3
174 ; CHECK-NEXT: rsb.w r3, r3, #108
175 ; CHECK-NEXT: add.w r4, r5, #19
176 ; CHECK-NEXT: wlstp.8 lr, r3, .LBB0_23
177 ; CHECK-NEXT: .LBB0_22: @ Parent Loop BB0_14 Depth=1
178 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
179 ; CHECK-NEXT: vstrb.8 q2, [r4], #16
180 ; CHECK-NEXT: letp lr, .LBB0_22
181 ; CHECK-NEXT: .LBB0_23: @ %for.cond.backedge.2
182 ; CHECK-NEXT: @ in Loop: Header=BB0_14 Depth=1
183 ; CHECK-NEXT: cmp r0, #2
184 ; CHECK-NEXT: blo .LBB0_14
185 ; CHECK-NEXT: b .LBB0_13
186 ; CHECK-NEXT: .LBB0_24: @ Parent Loop BB0_14 Depth=1
187 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
188 ; CHECK-NEXT: vstrb.8 q3, [r4], #16
189 ; CHECK-NEXT: letp lr, .LBB0_24
190 ; CHECK-NEXT: b .LBB0_14
192 %cmp = icmp ugt i8 %b, 2 ; avoid following BB optimizing away through the domination
193 br i1 %cmp, label %for.body.us.preheader, label %for.cond.preheader
195 for.cond.preheader: ; preds = %entry
196 %cmp43 = icmp ugt i8 %b, 1
197 %arrayidx6 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
198 %arrayidx12 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
199 %cmp43.1 = icmp ugt i8 %b, 1
200 %arrayidx6.1 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
201 %arrayidx12.1 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
202 %cmp43.2 = icmp ugt i8 %b, 1
203 %arrayidx6.2 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
204 %arrayidx12.2 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
205 %cmp43.3 = icmp ugt i8 %b, 1
206 %arrayidx6.3 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
207 %arrayidx12.3 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
210 for.body.us.preheader: ; preds = %entry
211 %arrayidx6.us.1 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
212 %arrayidx12.us.1 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
213 %arrayidx6.us.2 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
214 %arrayidx12.us.2 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
215 %arrayidx6.us.3 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
216 %arrayidx12.us.3 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
217 br label %for.body.us
219 for.cond: ; preds = %for.cond.backedge.3, %for.cond.preheader
220 br i1 %cmp43, label %for.body.lr.ph, label %for.cond.backedge
222 for.body.lr.ph: ; preds = %for.cond
223 %0 = load i32, ptr %arrayidx6, align 4
224 %tobool7.not = icmp eq i32 %0, 0
225 br i1 %tobool7.not, label %land.end, label %land.rhs
227 for.body.us: ; preds = %land.end.us.3, %for.body.us.preheader
228 %conv44.us = phi i32 [ 0, %for.body.us.preheader ], [ 1, %land.end.us.3 ]
229 %arrayidx6.us = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 %conv44.us
230 %1 = load i32, ptr %arrayidx6.us, align 4
231 %tobool7.not.us = icmp eq i32 %1, 0
232 br i1 %tobool7.not.us, label %land.end.us, label %land.rhs.us
234 land.rhs.us: ; preds = %for.body.us
235 %arrayidx12.us = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 %conv44.us
236 %2 = load i8, ptr %arrayidx12.us, align 1
237 %tobool13.us = zext i8 %2 to i32
238 br label %land.end.us
240 land.end.us: ; preds = %land.rhs.us, %for.body.us
241 %3 = phi i32 [ 0, %for.body.us ], [ %tobool13.us, %land.rhs.us ]
242 %scevgep45 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 %conv44.us, i32 %3
243 %4 = sub nuw nsw i32 108, %3
244 call void @llvm.memset.p0.i32(ptr align 1 %scevgep45, i8 0, i32 %4, i1 false)
245 %5 = load i32, ptr %arrayidx6.us.1, align 4
246 %tobool7.not.us.1 = icmp eq i32 %5, 0
247 br i1 %tobool7.not.us.1, label %land.end.us.1, label %land.rhs.us.1
249 land.rhs: ; preds = %for.body.lr.ph
250 %6 = load i8, ptr %arrayidx12, align 1
251 %tobool13 = zext i8 %6 to i32
254 land.end: ; preds = %land.rhs, %for.body.lr.ph
255 %7 = phi i32 [ 0, %for.body.lr.ph ], [ %tobool13, %land.rhs ]
256 %scevgep = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %7
257 %8 = sub nuw nsw i32 108, %7
258 call void @llvm.memset.p0.i32(ptr align 1 %scevgep, i8 0, i32 %8, i1 false)
259 br label %for.cond.backedge
261 for.cond.backedge: ; preds = %land.end, %for.cond
262 br i1 %cmp43.1, label %for.body.lr.ph.1, label %for.cond.backedge.1
264 for.body.lr.ph.1: ; preds = %for.cond.backedge
265 %9 = load i32, ptr %arrayidx6.1, align 4
266 %tobool7.not.1 = icmp eq i32 %9, 0
267 br i1 %tobool7.not.1, label %land.end.1, label %land.rhs.1
269 land.rhs.1: ; preds = %for.body.lr.ph.1
270 %10 = load i8, ptr %arrayidx12.1, align 1
271 %tobool13.1 = zext i8 %10 to i32
274 land.end.1: ; preds = %land.rhs.1, %for.body.lr.ph.1
275 %11 = phi i32 [ 0, %for.body.lr.ph.1 ], [ %tobool13.1, %land.rhs.1 ]
276 %scevgep.1 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %11
277 %12 = sub nuw nsw i32 108, %11
278 call void @llvm.memset.p0.i32(ptr align 1 %scevgep.1, i8 0, i32 %12, i1 false)
279 br label %for.cond.backedge.1
281 for.cond.backedge.1: ; preds = %land.end.1, %for.cond.backedge
282 br i1 %cmp43.2, label %for.body.lr.ph.2, label %for.cond.backedge.2
284 for.body.lr.ph.2: ; preds = %for.cond.backedge.1
285 %13 = load i32, ptr %arrayidx6.2, align 4
286 %tobool7.not.2 = icmp eq i32 %13, 0
287 br i1 %tobool7.not.2, label %land.end.2, label %land.rhs.2
289 land.rhs.2: ; preds = %for.body.lr.ph.2
290 %14 = load i8, ptr %arrayidx12.2, align 1
291 %tobool13.2 = zext i8 %14 to i32
294 land.end.2: ; preds = %land.rhs.2, %for.body.lr.ph.2
295 %15 = phi i32 [ 0, %for.body.lr.ph.2 ], [ %tobool13.2, %land.rhs.2 ]
296 %scevgep.2 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %15
297 %16 = sub nuw nsw i32 108, %15
298 call void @llvm.memset.p0.i32(ptr align 1 %scevgep.2, i8 0, i32 %16, i1 false)
299 br label %for.cond.backedge.2
301 for.cond.backedge.2: ; preds = %land.end.2, %for.cond.backedge.1
302 br i1 %cmp43.3, label %for.body.lr.ph.3, label %for.cond.backedge.3
304 for.body.lr.ph.3: ; preds = %for.cond.backedge.2
305 %17 = load i32, ptr %arrayidx6.3, align 4
306 %tobool7.not.3 = icmp eq i32 %17, 0
307 br i1 %tobool7.not.3, label %land.end.3, label %land.rhs.3
309 land.rhs.3: ; preds = %for.body.lr.ph.3
310 %18 = load i8, ptr %arrayidx12.3, align 1
311 %tobool13.3 = zext i8 %18 to i32
314 land.end.3: ; preds = %land.rhs.3, %for.body.lr.ph.3
315 %19 = phi i32 [ 0, %for.body.lr.ph.3 ], [ %tobool13.3, %land.rhs.3 ]
316 %scevgep.3 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %19
317 %20 = sub nuw nsw i32 108, %19
318 call void @llvm.memset.p0.i32(ptr align 1 %scevgep.3, i8 0, i32 %20, i1 false)
319 br label %for.cond.backedge.3
321 for.cond.backedge.3: ; preds = %land.end.3, %for.cond.backedge.2
324 land.rhs.us.1: ; preds = %land.end.us
325 %21 = load i8, ptr %arrayidx12.us.1, align 1
326 %tobool13.us.1 = zext i8 %21 to i32
327 br label %land.end.us.1
329 land.end.us.1: ; preds = %land.rhs.us.1, %land.end.us
330 %22 = phi i32 [ 0, %land.end.us ], [ %tobool13.us.1, %land.rhs.us.1 ]
331 %scevgep45.1 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %22
332 %23 = sub nuw nsw i32 108, %22
333 call void @llvm.memset.p0.i32(ptr align 1 %scevgep45.1, i8 0, i32 %23, i1 false)
334 %24 = load i32, ptr %arrayidx6.us.2, align 4
335 %tobool7.not.us.2 = icmp eq i32 %24, 0
336 br i1 %tobool7.not.us.2, label %land.end.us.2, label %land.rhs.us.2
338 land.rhs.us.2: ; preds = %land.end.us.1
339 %25 = load i8, ptr %arrayidx12.us.2, align 1
340 %tobool13.us.2 = zext i8 %25 to i32
341 br label %land.end.us.2
343 land.end.us.2: ; preds = %land.rhs.us.2, %land.end.us.1
344 %26 = phi i32 [ 0, %land.end.us.1 ], [ %tobool13.us.2, %land.rhs.us.2 ]
345 %scevgep45.2 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %26
346 %27 = sub nuw nsw i32 108, %26
347 call void @llvm.memset.p0.i32(ptr align 1 %scevgep45.2, i8 0, i32 %27, i1 false)
348 %28 = load i32, ptr %arrayidx6.us.3, align 4
349 %tobool7.not.us.3 = icmp eq i32 %28, 0
350 br i1 %tobool7.not.us.3, label %land.end.us.3, label %land.rhs.us.3
352 land.rhs.us.3: ; preds = %land.end.us.2
353 %29 = load i8, ptr %arrayidx12.us.3, align 1
354 %tobool13.us.3 = zext i8 %29 to i32
355 br label %land.end.us.3
357 land.end.us.3: ; preds = %land.rhs.us.3, %land.end.us.2
358 %30 = phi i32 [ 0, %land.end.us.2 ], [ %tobool13.us.3, %land.rhs.us.3 ]
359 %scevgep45.3 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %30
360 %31 = sub nuw nsw i32 108, %30
361 call void @llvm.memset.p0.i32(ptr align 1 %scevgep45.3, i8 0, i32 %31, i1 false)
362 br label %for.body.us
365 declare void @llvm.memset.p0.i32(ptr, i8, i32, i1)