1 ; RUN: llc -mtriple=thumbv7em -mattr=+fp-armv8 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT
2 ; RUN: llc -mtriple=thumbv8m.main -mattr=+fp-armv8,+dsp %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT
3 ; RUN: llc -mtriple=thumbv8m.main -mattr=+fp-armv8,+dsp -lsr-backedge-indexing=false %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLED
4 ; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLED
5 ; RUN: llc -mtriple=thumbv8m.main -mattr=+fp-armv8,+dsp -lsr-complexity-limit=2147483647 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-COMPLEX
7 ; CHECK-LABEL: test_qadd_2
10 ; CHECK-DEFAULT: ldr{{.*}}, #4]
11 ; CHECK-DEFAULT: ldr{{.*}}, #4]
12 ; CHECK-DEFAULT: str{{.*}}, #4]
13 ; CHECK-DEFAULT: ldr{{.*}}, #8]!
14 ; CHECK-DEAFULT: ldr{{.*}}, #8]!
15 ; CHECK-DEFAULT: str{{.*}}, #8]!
17 ; CHECK-COMPLEX: ldr{{.*}}, #8]!
18 ; CHECK-COMPLEX: ldr{{.*}}, #8]!
19 ; CHECK-COMPLEX: str{{.*}}, #8]!
20 ; CHECK-COMPLEX: ldr{{.*}}, #4]
21 ; CHECK-COMPLEX: ldr{{.*}}, #4]
22 ; CHECK-COMPLEX: str{{.*}}, #4]
24 ; DISABLED-NOT: ldr{{.*}}]!
25 ; DISABLED-NOT: str{{.*}}]!
27 define void @test_qadd_2(i32* %a.array, i32* %b.array, i32* %out.array, i32 %N) {
32 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
33 %idx.1 = phi i32 [ 0, %entry ], [ %idx.next, %loop ]
34 %gep.a.1 = getelementptr inbounds i32, i32* %a.array, i32 %idx.1
35 %a.1 = load i32, i32* %gep.a.1
36 %gep.b.1 = getelementptr inbounds i32, i32* %b.array, i32 %idx.1
37 %b.1 = load i32, i32* %gep.b.1
38 %qadd.1 = call i32 @llvm.arm.qadd(i32 %a.1, i32 %b.1)
39 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1
40 store i32 %qadd.1, i32* %addr.1
41 %idx.2 = or i32 %idx.1, 1
42 %gep.a.2 = getelementptr inbounds i32, i32* %a.array, i32 %idx.2
43 %a.2 = load i32, i32* %gep.a.2
44 %gep.b.2 = getelementptr inbounds i32, i32* %b.array, i32 %idx.2
45 %b.2 = load i32, i32* %gep.b.2
46 %qadd.2 = call i32 @llvm.arm.qadd(i32 %a.2, i32 %b.2)
47 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2
48 store i32 %qadd.2, i32* %addr.2
49 %i.next = add nsw nuw i32 %i, -2
50 %idx.next = add nsw nuw i32 %idx.1, 2
51 %cmp = icmp ult i32 %i.next, %N
52 br i1 %cmp, label %loop, label %exit
58 ; CHECK-LABEL: test_qadd_2_backwards
59 ; TODO: Indexes should be generated.
63 ; CHECK-DEFAULT: ldr{{.*}},
64 ; CHECK-DEFAULT: ldr{{.*}},
65 ; CHECK-DEFAULT: str{{.*}},
66 ; CHECK-DEFAULT: ldr{{.*}}, #-4]
67 ; CHECK-DEFAULT: ldr{{.*}}, #-4]
68 ; CHECK-DEFAULT: sub{{.*}}, #8
69 ; CHECK-DEFAULT: str{{.*}}, #-4]
70 ; CHECK-DEFAULT: sub{{.*}}, #8
72 ; CHECK-COMPLEX: ldr{{.*}} lsl #2]
73 ; CHECK-COMPLEX: ldr{{.*}} lsl #2]
74 ; CHECK-COMPLEX: str{{.*}} lsl #2]
75 ; CHECK-COMPLEX: ldr{{.*}} lsl #2]
76 ; CHECK-COMPLEX: ldr{{.*}} lsl #2]
77 ; CHECK-COMPLEX: str{{.*}} lsl #2]
79 ; DISABLED-NOT: ldr{{.*}}]!
80 ; DISABLED-NOT: str{{.*}}]!
82 define void @test_qadd_2_backwards(i32* %a.array, i32* %b.array, i32* %out.array, i32 %N) {
87 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
88 %idx.1 = phi i32 [ %N, %entry ], [ %idx.next, %loop ]
89 %gep.a.1 = getelementptr inbounds i32, i32* %a.array, i32 %idx.1
90 %a.1 = load i32, i32* %gep.a.1
91 %gep.b.1 = getelementptr inbounds i32, i32* %b.array, i32 %idx.1
92 %b.1 = load i32, i32* %gep.b.1
93 %qadd.1 = call i32 @llvm.arm.qadd(i32 %a.1, i32 %b.1)
94 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1
95 store i32 %qadd.1, i32* %addr.1
96 %idx.2 = sub nsw nuw i32 %idx.1, 1
97 %gep.a.2 = getelementptr inbounds i32, i32* %a.array, i32 %idx.2
98 %a.2 = load i32, i32* %gep.a.2
99 %gep.b.2 = getelementptr inbounds i32, i32* %b.array, i32 %idx.2
100 %b.2 = load i32, i32* %gep.b.2
101 %qadd.2 = call i32 @llvm.arm.qadd(i32 %a.2, i32 %b.2)
102 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2
103 store i32 %qadd.2, i32* %addr.2
104 %i.next = add nsw nuw i32 %i, -2
105 %idx.next = sub nsw nuw i32 %idx.1, 2
106 %cmp = icmp ult i32 %i.next, %N
107 br i1 %cmp, label %loop, label %exit
113 ; CHECK-LABEL: test_qadd_3
116 ; CHECK-DEFAULT: ldr{{.*}}, #8]
117 ; CHECK-DEFAULT: ldr{{.*}}, #8]
118 ; CHECK-DEFAULT: str{{.*}}, #8]
119 ; CHECK-DEFAULT: ldr{{.*}}, #12]!
120 ; CHECK-DEFAULT: ldr{{.*}}, #12]!
121 ; CHECK-DEFAULT: str{{.*}}, #12]!
123 ; CHECK-COMPLEX: ldr{{.*}}, #12]!
124 ; CHECK-COMPLEX: ldr{{.*}}, #12]!
125 ; CHECK-COMPLEX: str{{.*}}, #12]!
126 ; CHECK-COMPLEX: ldr{{.*}}, #4]
127 ; CHECK-COMPLEX: ldr{{.*}}, #4]
128 ; CHECK-COMPLEX: str{{.*}}, #4]
129 ; CHECK-COMPLEX: ldr{{.*}}, #8]
130 ; CHECK-COMPLEX: ldr{{.*}}, #8]
131 ; CHECK-COMPLEX: str{{.*}}, #8]
133 ; DISABLED-NOT: ldr{{.*}}]!
134 ; DISABLED-NOT: str{{.*}}]!
136 define void @test_qadd_3(i32* %a.array, i32* %b.array, i32* %out.array, i32 %N) {
141 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
142 %idx.1 = phi i32 [ 0, %entry ], [ %idx.next, %loop ]
143 %gep.a.1 = getelementptr inbounds i32, i32* %a.array, i32 %idx.1
144 %a.1 = load i32, i32* %gep.a.1
145 %gep.b.1 = getelementptr inbounds i32, i32* %b.array, i32 %idx.1
146 %b.1 = load i32, i32* %gep.b.1
147 %qadd.1 = call i32 @llvm.arm.qadd(i32 %a.1, i32 %b.1)
148 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1
149 store i32 %qadd.1, i32* %addr.1
150 %idx.2 = add nuw nsw i32 %idx.1, 1
151 %gep.a.2 = getelementptr inbounds i32, i32* %a.array, i32 %idx.2
152 %a.2 = load i32, i32* %gep.a.2
153 %gep.b.2 = getelementptr inbounds i32, i32* %b.array, i32 %idx.2
154 %b.2 = load i32, i32* %gep.b.2
155 %qadd.2 = call i32 @llvm.arm.qadd(i32 %a.2, i32 %b.2)
156 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2
157 store i32 %qadd.2, i32* %addr.2
158 %idx.3 = add nuw nsw i32 %idx.1, 2
159 %gep.a.3 = getelementptr inbounds i32, i32* %a.array, i32 %idx.3
160 %a.3 = load i32, i32* %gep.a.3
161 %gep.b.3 = getelementptr inbounds i32, i32* %b.array, i32 %idx.3
162 %b.3 = load i32, i32* %gep.b.3
163 %qadd.3 = call i32 @llvm.arm.qadd(i32 %a.3, i32 %b.3)
164 %addr.3 = getelementptr inbounds i32, i32* %out.array, i32 %idx.3
165 store i32 %qadd.3, i32* %addr.3
166 %i.next = add nsw nuw i32 %i, -3
167 %idx.next = add nsw nuw i32 %idx.1, 3
168 %cmp = icmp ult i32 %i.next, %N
169 br i1 %cmp, label %loop, label %exit
175 ; CHECK-LABEL: test_qadd_4
178 ; TODO: pre-inc store
180 ; CHECK-DEFAULT: ldr{{.*}}, #4]
181 ; CHECK-DEFAULT: ldr{{.*}}, #4]
182 ; CHECK-DEFAULT: str{{.*}}, #4]
183 ; CHECK-DEFAULT: ldr{{.*}}, #8]
184 ; CHECK-DEFAULT: ldr{{.*}}, #8]
185 ; CHECK-DEFAULT: str{{.*}}, #8]
186 ; CHECK-DEFAULT: ldr{{.*}}, #12]
187 ; CHECK-DEFAULT: ldr{{.*}}, #12]
188 ; CHECK-DEFAULT: str{{.*}}, #12]
190 ; CHECK-COMPLEX: ldr{{.*}}, #16]!
191 ; CHECK-COMPLEX: ldr{{.*}}, #16]!
192 ; CHECK-COMPLEX: str{{.*}}, #16]!
193 ; CHECK-COMPLEX: ldr{{.*}}, #4]
194 ; CHECK-COMPLEX: ldr{{.*}}, #4]
195 ; CHECK-COMPLEX: str{{.*}}, #4]
196 ; CHECK-COMPLEX: ldr{{.*}}, #8]
197 ; CHECK-COMPLEX: ldr{{.*}}, #8]
198 ; CHECK-COMPLEX: str{{.*}}, #8]
199 ; CHECK-COMPLEX: ldr{{.*}}, #12]
200 ; CHECK-COMPLEX: ldr{{.*}}, #12]
201 ; CHECK-COMPLEX: str{{.*}}, #12]
203 ; DISABLED-NOT: ldr{{.*}}]!
204 ; DISABLED-NOT: str{{.*}}]!
206 define void @test_qadd_4(i32* %a.array, i32* %b.array, i32* %out.array, i32 %N) {
211 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
212 %idx.1 = phi i32 [ 0, %entry ], [ %idx.next, %loop ]
213 %gep.a.1 = getelementptr inbounds i32, i32* %a.array, i32 %idx.1
214 %a.1 = load i32, i32* %gep.a.1
215 %gep.b.1 = getelementptr inbounds i32, i32* %b.array, i32 %idx.1
216 %b.1 = load i32, i32* %gep.b.1
217 %qadd.1 = call i32 @llvm.arm.qadd(i32 %a.1, i32 %b.1)
218 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1
219 store i32 %qadd.1, i32* %addr.1
220 %idx.2 = or i32 %idx.1, 1
221 %gep.a.2 = getelementptr inbounds i32, i32* %a.array, i32 %idx.2
222 %a.2 = load i32, i32* %gep.a.2
223 %gep.b.2 = getelementptr inbounds i32, i32* %b.array, i32 %idx.2
224 %b.2 = load i32, i32* %gep.b.2
225 %qadd.2 = call i32 @llvm.arm.qadd(i32 %a.2, i32 %b.2)
226 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2
227 store i32 %qadd.2, i32* %addr.2
228 %idx.3 = or i32 %idx.1, 2
229 %gep.a.3 = getelementptr inbounds i32, i32* %a.array, i32 %idx.3
230 %a.3 = load i32, i32* %gep.a.3
231 %gep.b.3 = getelementptr inbounds i32, i32* %b.array, i32 %idx.3
232 %b.3 = load i32, i32* %gep.b.3
233 %qadd.3 = call i32 @llvm.arm.qadd(i32 %a.3, i32 %b.3)
234 %addr.3 = getelementptr inbounds i32, i32* %out.array, i32 %idx.3
235 store i32 %qadd.3, i32* %addr.3
236 %idx.4 = or i32 %idx.1, 3
237 %gep.a.4 = getelementptr inbounds i32, i32* %a.array, i32 %idx.4
238 %a.4 = load i32, i32* %gep.a.4
239 %gep.b.4 = getelementptr inbounds i32, i32* %b.array, i32 %idx.4
240 %b.4 = load i32, i32* %gep.b.4
241 %qadd.4 = call i32 @llvm.arm.qadd(i32 %a.4, i32 %b.4)
242 %addr.4 = getelementptr inbounds i32, i32* %out.array, i32 %idx.4
243 store i32 %qadd.4, i32* %addr.4
244 %i.next = add nsw nuw i32 %i, -4
245 %idx.next = add nsw nuw i32 %idx.1, 4
246 %cmp = icmp ult i32 %i.next, %N
247 br i1 %cmp, label %loop, label %exit
253 ; CHECK-LABEL: test_qadd16_2
255 ; TODO: pre-inc store.
257 ; CHECK-DEFAULT: ldr{{.*}}, #4]
258 ; CHECK-DEFAULT: ldr{{.*}}, #4]
259 ; CHECK-DEFAULT: str{{.*}}, #8]
260 ; CHECK-DEFAULT: ldr{{.*}}, #8]!
261 ; CHECK-DEFAULT: ldr{{.*}}, #8]!
262 ; CHECK-DEFAULT: str{{.*}}, #16]!
264 ; CHECK-COMPLEX: ldr{{.*}}, #8]!
265 ; CHECK-COMPLEX: ldr{{.*}}, #8]!
266 ; CHECK-COMPLEX: str{{.*}}, #16]!
267 ; CHECK-COMPLEX: ldr{{.*}}, #4]
268 ; CHECK-COMPLEX: ldr{{.*}}, #4]
269 ; CHECK-COMPLEX: str{{.*}}, #8]
271 ; DISABLED-NOT: ldr{{.*}}]!
272 ; DISABLED-NOT: str{{.*}}]!
274 define void @test_qadd16_2(i16* %a.array, i16* %b.array, i32* %out.array, i32 %N) {
279 %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
280 %idx.1 = phi i32 [ 0, %entry ], [ %idx.next, %loop ]
281 %gep.a.1 = getelementptr inbounds i16, i16* %a.array, i32 %idx.1
282 %cast.a.1 = bitcast i16* %gep.a.1 to i32*
283 %a.1 = load i32, i32* %cast.a.1
284 %gep.b.1 = getelementptr inbounds i16, i16* %b.array, i32 %idx.1
285 %cast.b.1 = bitcast i16* %gep.b.1 to i32*
286 %b.1 = load i32, i32* %cast.b.1
287 %qadd.1 = call i32 @llvm.arm.qadd16(i32 %a.1, i32 %b.1)
288 %addr.1 = getelementptr inbounds i32, i32* %out.array, i32 %idx.1
289 store i32 %qadd.1, i32* %addr.1
290 %idx.2 = add nsw nuw i32 %idx.1, 2
291 %gep.a.2 = getelementptr inbounds i16, i16* %a.array, i32 %idx.2
292 %cast.a.2 = bitcast i16* %gep.a.2 to i32*
293 %a.2 = load i32, i32* %cast.a.2
294 %gep.b.2 = getelementptr inbounds i16, i16* %b.array, i32 %idx.2
295 %cast.b.2 = bitcast i16* %gep.b.2 to i32*
296 %b.2 = load i32, i32* %cast.b.2
297 %qadd.2 = call i32 @llvm.arm.qadd16(i32 %a.2, i32 %b.2)
298 %addr.2 = getelementptr inbounds i32, i32* %out.array, i32 %idx.2
299 store i32 %qadd.2, i32* %addr.2
300 %i.next = add nsw nuw i32 %i, -2
301 %idx.next = add nsw nuw i32 %idx.1, 4
302 %cmp = icmp ult i32 %i.next, %N
303 br i1 %cmp, label %loop, label %exit
309 declare i32 @llvm.arm.qadd(i32, i32)
310 declare i32 @llvm.arm.qadd16(i32, i32)