1 ; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
2 ; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL
4 ; Not implemented as a mir test so that changes the generic HardwareLoop can
5 ; also be tested. These functions have been taken from
6 ; Transforms/HardwareLoops/loop-guards.ll in which can be seen the generation
7 ; of a few test.set intrinsics, but only one (ne_trip_count) gets generated
8 ; here. Simplifications result in icmps changing and maybe also the CFG. So,
9 ; TODO: Teach the HardwareLoops some better pattern recognition.
11 ; CHECK-GLOBAL-NOT: DoLoopStart
12 ; CHECK-GLOBAL-NOT: WhileLoopStart
13 ; CHECK-GLOBAL-NOT: LoopEnd
18 ; CHECK: t2CMPri renamable $lr, 0
20 ; CHECK: bb.1.while.body.preheader:
21 ; CHECK: $lr = t2DLS renamable $lr
22 ; CHECK: bb.2.while.body:
23 ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
24 define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
26 %brmerge.demorgan = and i1 %t1, %t2
27 %cmp6 = icmp ne i32 %N, 0
28 %or.cond = and i1 %brmerge.demorgan, %cmp6
29 br i1 %or.cond, label %while.body, label %if.end
31 while.body: ; preds = %while.body, %entry
32 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
33 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %entry ]
34 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %entry ]
35 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
36 %tmp = load i32, i32* %b.addr.07, align 4
37 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
38 store i32 %tmp, i32* %a.addr.08, align 4
39 %inc = add nuw i32 %i.09, 1
40 %exitcond = icmp eq i32 %inc, %N
41 br i1 %exitcond, label %if.end, label %while.body
43 if.end: ; preds = %while.body, %entry
47 ; TODO: This could generate WLS
51 ; CHECK: t2CMPri renamable $lr, 0
53 ; CHECK: bb.1.while.body.preheader:
54 ; CHECK: $lr = t2DLS renamable $lr
55 ; CHECK: bb.2.while.body:
56 ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
57 define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
59 %brmerge.demorgan = and i1 %t1, %t2
60 br i1 %brmerge.demorgan, label %while.preheader, label %if.end
62 while.preheader: ; preds = %entry
63 %cmp = icmp ne i32 %N, 0
64 br i1 %cmp, label %while.body, label %if.end
66 while.body: ; preds = %while.body, %while.preheader
67 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
68 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
69 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
70 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
71 %tmp = load i32, i32* %b.addr.07, align 4
72 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
73 store i32 %tmp, i32* %a.addr.08, align 4
74 %inc = add nuw i32 %i.09, 1
75 %exitcond = icmp eq i32 %inc, %N
76 br i1 %exitcond, label %if.end, label %while.body
78 if.end: ; preds = %while.body, %while.preheader, %entry
82 ; TODO: This could generate WLS
86 ; CHECK: t2CMPri renamable $lr, 0
88 ; CHECK: bb.1.while.body.preheader:
89 ; CHECK: $lr = t2DLS renamable $lr
90 ; CHECK: bb.2.while.body:
91 ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
92 define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
94 %brmerge.demorgan = and i1 %t1, %t2
95 br i1 %brmerge.demorgan, label %while.preheader, label %if.end
97 while.preheader: ; preds = %entry
98 %cmp = icmp eq i32 %N, 0
99 br i1 %cmp, label %if.end, label %while.body
101 while.body: ; preds = %while.body, %while.preheader
102 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
103 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
104 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
105 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
106 %tmp = load i32, i32* %b.addr.07, align 4
107 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
108 store i32 %tmp, i32* %a.addr.08, align 4
109 %inc = add nuw i32 %i.09, 1
110 %exitcond = icmp eq i32 %inc, %N
111 br i1 %exitcond, label %if.end, label %while.body
113 if.end: ; preds = %while.body, %while.preheader, %entry
117 ; TODO: This could generate WLS
118 ; CHECK: ne_prepreheader
121 ; CHECK: t2CMPri renamable $lr, 0
123 ; CHECK: bb.1.while.body.preheader:
124 ; CHECK: $lr = t2DLS renamable $lr
125 ; CHECK: bb.2.while.body:
126 ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
127 define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
129 %cmp = icmp ne i32 %N, 0
130 br i1 %cmp, label %while.preheader, label %if.end
132 while.preheader: ; preds = %entry
133 %brmerge.demorgan = and i1 %t1, %t2
134 br i1 %brmerge.demorgan, label %while.body, label %if.end
136 while.body: ; preds = %while.body, %while.preheader
137 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
138 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
139 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
140 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
141 %tmp = load i32, i32* %b.addr.07, align 4
142 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
143 store i32 %tmp, i32* %a.addr.08, align 4
144 %inc = add nuw i32 %i.09, 1
145 %exitcond = icmp eq i32 %inc, %N
146 br i1 %exitcond, label %if.end, label %while.body
148 if.end: ; preds = %while.body, %while.preheader, %entry
155 ; CHECK: $lr = t2DLS renamable $lr
156 ; CHECK: bb.1.do.body:
157 ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
158 define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
160 %cmp = icmp ne i32 %N, 0
162 %be = select i1 %cmp, i32 0, i32 %sub
163 %cmp.1 = icmp ne i32 %be, 0
164 br i1 %cmp.1, label %do.body, label %if.end
166 do.body: ; preds = %do.body, %entry
167 %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %entry ]
168 %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %entry ]
169 %i.0 = phi i32 [ %inc, %do.body ], [ 0, %entry ]
170 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
171 %tmp = load i32, i32* %b.addr.0, align 4
172 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
173 store i32 %tmp, i32* %a.addr.0, align 4
174 %inc = add nuw i32 %i.0, 1
175 %cmp.2 = icmp ult i32 %inc, %N
176 br i1 %cmp.2, label %do.body, label %if.end
178 if.end: ; preds = %do.body, %entry
182 ; TODO: Remove the tMOVr in the preheader!
183 ; CHECK: ne_trip_count
186 ; CHECK: $lr = t2WLS $r3, %bb.3
187 ; CHECK: bb.1.do.body.preheader:
189 ; CHECK: bb.2.do.body:
190 ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
191 define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
193 br label %do.body.preheader
196 %cmp = icmp ne i32 %N, 0
197 br i1 %cmp, label %do.body, label %if.end
200 %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %do.body.preheader ]
201 %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %do.body.preheader ]
202 %i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ]
203 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
204 %tmp = load i32, i32* %b.addr.0, align 4
205 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
206 store i32 %tmp, i32* %a.addr.0, align 4
207 %inc = add nuw i32 %i.0, 1
208 %cmp.1 = icmp ult i32 %inc, %N
209 br i1 %cmp.1, label %do.body, label %if.end
211 if.end: ; preds = %do.body, %entry